Merge tag 'usb-4.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
Pull usb/phy/extcon updates from Greg KH:
"Here is the big USB, and PHY, and extcon, patchsets for 4.9-rc1.
Full details are in the shortlog, but generally a lot of new hardware
support, usb gadget updates, and Wolfram's great cleanup of USB error
message handling, making the kernel image a tad bit smaller.
All of this has been in linux-next with no reported issues"
* tag 'usb-4.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb: (343 commits)
Revert "usbtmc: convert to devm_kzalloc"
USB: serial: cp210x: Add ID for a Juniper console
usb: Kconfig: using select for USB_COMMON dependency
bluetooth: bcm203x: don't print error when allocating urb fails
mmc: host: vub300: don't print error when allocating urb fails
usb: hub: change CLEAR_FEATURE to SET_FEATURE
usb: core: Introduce a USB port LED trigger
USB: bcma: drop Northstar PHY 2.0 initialization code
usb: core: hcd: add missing header dependencies
usb: musb: da8xx: fix error handling message in probe
usb: musb: Fix session based PM for first invalid VBUS
usb: musb: Fix PM runtime for disconnect after unconfigure
musb: Export musb_root_disconnect for use in modules
usb: misc: legousbtower: Fix NULL pointer deference
cdc-acm: hardening against malicious devices
Revert "usb: gadget: NCM: Protect dev->port_usb using dev->lock"
include: extcon: Fix compilation error caused because of incomplete merge
MAINTAINERS: add tree entry for USB Serial
phy-twl4030-usb: initialize charging-related stuff via pm_runtime
phy-twl4030-usb: better handle musb_mailbox() failure
...
diff --git a/.mailmap b/.mailmap
index de22dae..1dab0a1 100644
--- a/.mailmap
+++ b/.mailmap
@@ -69,6 +69,7 @@
James Bottomley <jejb@titanic.il.steeleye.com>
James E Wilson <wilson@specifix.com>
James Ketrenos <jketreno@io.(none)>
+Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com>
<javier@osg.samsung.com> <javier.martinez@collabora.co.uk>
Jean Tourrilhes <jt@hpl.hp.com>
Jeff Garzik <jgarzik@pretzel.yyz.us>
diff --git a/CREDITS b/CREDITS
index 2a3fbcd..0367a4b 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1944,6 +1944,11 @@
E: kraxel@suse.de
D: video4linux, bttv, vesafb, some scsi, misc fixes
+N: Hans J. Koch
+D: USERSPACE I/O, MAX6650
+D: Hans passed away in June 2016, and will be greatly missed.
+W: https://lwn.net/Articles/691000/
+
N: Harald Koenig
E: koenig@tat.physik.uni-tuebingen.de
D: XFree86 (S3), DCF77, some kernel hacks and fixes
@@ -3691,7 +3696,7 @@
N: Geert Uytterhoeven
E: geert@linux-m68k.org
W: http://users.telenet.be/geertu/
-P: 1024/862678A6 C51D 361C 0BD1 4C90 B275 C553 6EEA 11BA 8626 78A6
+P: 4096R/4804B4BC3F55EEFB 750D 82B0 A781 5431 5E25 925B 4804 B4BC 3F55 EEFB
D: m68k/Amiga and PPC/CHRP Longtrail coordinator
D: Frame buffer device and XF68_FBDev maintainer
D: m68k IDE maintainer
diff --git a/Documentation/ABI/testing/sysfs-class-mic.txt b/Documentation/ABI/testing/sysfs-class-mic.txt
index d45eed2..6ef6826 100644
--- a/Documentation/ABI/testing/sysfs-class-mic.txt
+++ b/Documentation/ABI/testing/sysfs-class-mic.txt
@@ -153,7 +153,7 @@
What: /sys/class/mic/mic(x)/heartbeat_enable
Date: March 2015
-KernelVersion: 3.20
+KernelVersion: 4.4
Contact: Ashutosh Dixit <ashutosh.dixit@intel.com>
Description:
The MIC drivers detect and inform user space about card crashes
diff --git a/Documentation/ABI/testing/sysfs-i2c-bmp085 b/Documentation/ABI/testing/sysfs-i2c-bmp085
deleted file mode 100644
index 585962a..0000000
--- a/Documentation/ABI/testing/sysfs-i2c-bmp085
+++ /dev/null
@@ -1,31 +0,0 @@
-What: /sys/bus/i2c/devices/<busnum>-<devaddr>/pressure0_input
-Date: June 2010
-Contact: Christoph Mair <christoph.mair@gmail.com>
-Description: Start a pressure measurement and read the result. Values
- represent the ambient air pressure in pascal (0.01 millibar).
-
- Reading: returns the current air pressure.
-
-
-What: /sys/bus/i2c/devices/<busnum>-<devaddr>/temp0_input
-Date: June 2010
-Contact: Christoph Mair <christoph.mair@gmail.com>
-Description: Measure the ambient temperature. The returned value represents
- the ambient temperature in units of 0.1 degree celsius.
-
- Reading: returns the current temperature.
-
-
-What: /sys/bus/i2c/devices/<busnum>-<devaddr>/oversampling
-Date: June 2010
-Contact: Christoph Mair <christoph.mair@gmail.com>
-Description: Tell the bmp085 to use more samples to calculate a pressure
- value. When writing to this file the chip will use 2^x samples
- to calculate the next pressure value with x being the value
- written. Using this feature will decrease RMS noise and
- increase the measurement time.
-
- Reading: returns the current oversampling setting.
-
- Writing: sets a new oversampling setting.
- Accepted values: 0..3.
diff --git a/Documentation/ABI/testing/sysfs-kernel-irq b/Documentation/ABI/testing/sysfs-kernel-irq
new file mode 100644
index 0000000..eb074b1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-irq
@@ -0,0 +1,53 @@
+What: /sys/kernel/irq
+Date: September 2016
+KernelVersion: 4.9
+Contact: Craig Gallek <kraig@google.com>
+Description: Directory containing information about the system's IRQs.
+ Specifically, data from the associated struct irq_desc.
+ The information here is similar to that in /proc/interrupts
+ but in a more machine-friendly format. This directory contains
+ one subdirectory for each Linux IRQ number.
+
+What: /sys/kernel/irq/<irq>/actions
+Date: September 2016
+KernelVersion: 4.9
+Contact: Craig Gallek <kraig@google.com>
+Description: The IRQ action chain. A comma-separated list of zero or more
+ device names associated with this interrupt.
+
+What: /sys/kernel/irq/<irq>/chip_name
+Date: September 2016
+KernelVersion: 4.9
+Contact: Craig Gallek <kraig@google.com>
+Description: Human-readable chip name supplied by the associated device
+ driver.
+
+What: /sys/kernel/irq/<irq>/hwirq
+Date: September 2016
+KernelVersion: 4.9
+Contact: Craig Gallek <kraig@google.com>
+Description: When interrupt translation domains are used, this file contains
+ the underlying hardware IRQ number used for this Linux IRQ.
+
+What: /sys/kernel/irq/<irq>/name
+Date: September 2016
+KernelVersion: 4.9
+Contact: Craig Gallek <kraig@google.com>
+Description: Human-readable flow handler name as defined by the irq chip
+ driver.
+
+What: /sys/kernel/irq/<irq>/per_cpu_count
+Date: September 2016
+KernelVersion: 4.9
+Contact: Craig Gallek <kraig@google.com>
+Description: The number of times the interrupt has fired since boot. This
+ is a comma-separated list of counters; one per CPU in CPU id
+ order. NOTE: This file consistently shows counters for all
+ CPU ids. This differs from the behavior of /proc/interrupts
+ which only shows counters for online CPUs.
+
+What: /sys/kernel/irq/<irq>/type
+Date: September 2016
+KernelVersion: 4.9
+Contact: Craig Gallek <kraig@google.com>
+Description: The type of the interrupt. Either the string 'level' or 'edge'.
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index ece410f..a4d3838 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -2493,6 +2493,28 @@
variant of <tt>call_rcu()</tt> that might one day be created for
energy-efficiency purposes.
+<p>
+That said, there are limits.
+RCU requires that the <tt>rcu_head</tt> structure be aligned to a
+two-byte boundary, and passing a misaligned <tt>rcu_head</tt>
+structure to one of the <tt>call_rcu()</tt> family of functions
+will result in a splat.
+It is therefore necessary to exercise caution when packing
+structures containing fields of type <tt>rcu_head</tt>.
+Why not a four-byte or even eight-byte alignment requirement?
+Because the m68k architecture provides only two-byte alignment,
+and thus acts as alignment's least common denominator.
+
+<p>
+The reason for reserving the bottom bit of pointers to
+<tt>rcu_head</tt> structures is to leave the door open to
+“lazy” callbacks whose invocations can safely be deferred.
+Deferring invocation could potentially have energy-efficiency
+benefits, but only if the rate of non-lazy callbacks decreases
+significantly for some important workload.
+In the meantime, reserving the bottom bit keeps this option open
+in case it one day becomes useful.
+
<h3><a name="Performance, Scalability, Response Time, and Reliability">
Performance, Scalability, Response Time, and Reliability</a></h3>
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 118e7c1..278f6a9 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -10,21 +10,6 @@
command (perhaps grepping for "torture"). The test is started
when the module is loaded, and stops when the module is unloaded.
-CONFIG_RCU_TORTURE_TEST_RUNNABLE
-
-It is also possible to specify CONFIG_RCU_TORTURE_TEST=y, which will
-result in the tests being loaded into the base kernel. In this case,
-the CONFIG_RCU_TORTURE_TEST_RUNNABLE config option is used to specify
-whether the RCU torture tests are to be started immediately during
-boot or whether the /proc/sys/kernel/rcutorture_runnable file is used
-to enable them. This /proc file can be used to repeatedly pause and
-restart the tests, regardless of the initial state specified by the
-CONFIG_RCU_TORTURE_TEST_RUNNABLE config option.
-
-You will normally -not- want to start the RCU torture tests during boot
-(and thus the default is CONFIG_RCU_TORTURE_TEST_RUNNABLE=n), but doing
-this can sometimes be useful in finding boot-time bugs.
-
MODULE PARAMETERS
diff --git a/Documentation/acpi/acpi-lid.txt b/Documentation/acpi/acpi-lid.txt
new file mode 100644
index 0000000..effe7af
--- /dev/null
+++ b/Documentation/acpi/acpi-lid.txt
@@ -0,0 +1,96 @@
+Special Usage Model of the ACPI Control Method Lid Device
+
+Copyright (C) 2016, Intel Corporation
+Author: Lv Zheng <lv.zheng@intel.com>
+
+
+Abstract:
+
+Platforms containing lids convey lid state (open/close) to OSPMs using a
+control method lid device. To implement this, the AML tables issue
+Notify(lid_device, 0x80) to notify the OSPMs whenever the lid state has
+changed. The _LID control method for the lid device must be implemented to
+report the "current" state of the lid as either "opened" or "closed".
+
+For most platforms, both the _LID method and the lid notifications are
+reliable. However, there are exceptions. In order to work with these
+exceptional buggy platforms, special restrictions and expections should be
+taken into account. This document describes the restrictions and the
+expections of the Linux ACPI lid device driver.
+
+
+1. Restrictions of the returning value of the _LID control method
+
+The _LID control method is described to return the "current" lid state.
+However the word of "current" has ambiguity, some buggy AML tables return
+the lid state upon the last lid notification instead of returning the lid
+state upon the last _LID evaluation. There won't be difference when the
+_LID control method is evaluated during the runtime, the problem is its
+initial returning value. When the AML tables implement this control method
+with cached value, the initial returning value is likely not reliable.
+There are platforms always retun "closed" as initial lid state.
+
+2. Restrictions of the lid state change notifications
+
+There are buggy AML tables never notifying when the lid device state is
+changed to "opened". Thus the "opened" notification is not guaranteed. But
+it is guaranteed that the AML tables always notify "closed" when the lid
+state is changed to "closed". The "closed" notification is normally used to
+trigger some system power saving operations on Windows. Since it is fully
+tested, it is reliable from all AML tables.
+
+3. Expections for the userspace users of the ACPI lid device driver
+
+The ACPI button driver exports the lid state to the userspace via the
+following file:
+ /proc/acpi/button/lid/LID0/state
+This file actually calls the _LID control method described above. And given
+the previous explanation, it is not reliable enough on some platforms. So
+it is advised for the userspace program to not to solely rely on this file
+to determine the actual lid state.
+
+The ACPI button driver emits the following input event to the userspace:
+ SW_LID
+The ACPI lid device driver is implemented to try to deliver the platform
+triggered events to the userspace. However, given the fact that the buggy
+firmware cannot make sure "opened"/"closed" events are paired, the ACPI
+button driver uses the following 3 modes in order not to trigger issues.
+
+If the userspace hasn't been prepared to ignore the unreliable "opened"
+events and the unreliable initial state notification, Linux users can use
+the following kernel parameters to handle the possible issues:
+A. button.lid_init_state=method:
+ When this option is specified, the ACPI button driver reports the
+ initial lid state using the returning value of the _LID control method
+ and whether the "opened"/"closed" events are paired fully relies on the
+ firmware implementation.
+ This option can be used to fix some platforms where the returning value
+ of the _LID control method is reliable but the initial lid state
+ notification is missing.
+ This option is the default behavior during the period the userspace
+ isn't ready to handle the buggy AML tables.
+B. button.lid_init_state=open:
+ When this option is specified, the ACPI button driver always reports the
+ initial lid state as "opened" and whether the "opened"/"closed" events
+ are paired fully relies on the firmware implementation.
+ This may fix some platforms where the returning value of the _LID
+ control method is not reliable and the initial lid state notification is
+ missing.
+
+If the userspace has been prepared to ignore the unreliable "opened" events
+and the unreliable initial state notification, Linux users should always
+use the following kernel parameter:
+C. button.lid_init_state=ignore:
+ When this option is specified, the ACPI button driver never reports the
+ initial lid state and there is a compensation mechanism implemented to
+ ensure that the reliable "closed" notifications can always be delievered
+ to the userspace by always pairing "closed" input events with complement
+ "opened" input events. But there is still no guarantee that the "opened"
+ notifications can be delivered to the userspace when the lid is actually
+ opens given that some AML tables do not send "opened" notifications
+ reliably.
+ In this mode, if everything is correctly implemented by the platform
+ firmware, the old userspace programs should still work. Otherwise, the
+ new userspace programs are required to work with the ACPI button driver.
+ This option will be the default behavior after the userspace is ready to
+ handle the buggy AML tables.
diff --git a/Documentation/acpi/gpio-properties.txt b/Documentation/acpi/gpio-properties.txt
index f35dad1..5aafe0b3 100644
--- a/Documentation/acpi/gpio-properties.txt
+++ b/Documentation/acpi/gpio-properties.txt
@@ -28,8 +28,8 @@
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
Package ()
{
- Package () {"reset-gpio", Package() {^BTH, 1, 1, 0 }},
- Package () {"shutdown-gpio", Package() {^BTH, 0, 0, 0 }},
+ Package () {"reset-gpios", Package() {^BTH, 1, 1, 0 }},
+ Package () {"shutdown-gpios", Package() {^BTH, 0, 0, 0 }},
}
})
}
@@ -48,7 +48,7 @@
active low or high, the "active_low" argument can be used here. Setting
it to 1 marks the GPIO as active low.
-In our Bluetooth example the "reset-gpio" refers to the second GpioIo()
+In our Bluetooth example the "reset-gpios" refers to the second GpioIo()
resource, second pin in that resource with the GPIO number of 31.
ACPI GPIO Mappings Provided by Drivers
@@ -83,8 +83,8 @@
static const struct acpi_gpio_params shutdown_gpio = { 0, 0, false };
static const struct acpi_gpio_mapping bluetooth_acpi_gpios[] = {
- { "reset-gpio", &reset_gpio, 1 },
- { "shutdown-gpio", &shutdown_gpio, 1 },
+ { "reset-gpios", &reset_gpio, 1 },
+ { "shutdown-gpios", &shutdown_gpio, 1 },
{ },
};
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index ccc6032..405da11 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -61,3 +61,5 @@
| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 |
| Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 |
| Cavium | ThunderX SMMUv2 | #27704 | N/A |
+| | | | |
+| Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 |
diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
index e774128..ef5fbe9 100644
--- a/Documentation/devicetree/bindings/arm/arch_timer.txt
+++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
@@ -25,6 +25,12 @@
- always-on : a boolean property. If present, the timer is powered through an
always-on power domain, therefore it never loses context.
+- fsl,erratum-a008585 : A boolean property. Indicates the presence of
+ QorIQ erratum A-008585, which says that reading the counter is
+ unreliable unless the same value is returned by back-to-back reads.
+ This also affects writes to the tval register, due to the implicit
+ counter read.
+
** Optional properties:
- arm,cpu-registers-not-fw-configured : Firmware does not initialize
diff --git a/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt b/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt
new file mode 100644
index 0000000..f223313
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt
@@ -0,0 +1,19 @@
+
+* Rockchip rk3399 DFI device
+
+Required properties:
+- compatible: Must be "rockchip,rk3399-dfi".
+- reg: physical base address of each DFI and length of memory mapped region
+- rockchip,pmu: phandle to the syscon managing the "pmu general register files"
+- clocks: phandles for clock specified in "clock-names" property
+- clock-names : the name of clock used by the DFI, must be "pclk_ddr_mon";
+
+Example:
+ dfi: dfi@0xff630000 {
+ compatible = "rockchip,rk3399-dfi";
+ reg = <0x00 0xff630000 0x00 0x4000>;
+ rockchip,pmu = <&pmugrf>;
+ clocks = <&cru PCLK_DDR_MON>;
+ clock-names = "pclk_ddr_mon";
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
new file mode 100644
index 0000000..7a9e860
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
@@ -0,0 +1,209 @@
+* Rockchip rk3399 DMC(Dynamic Memory Controller) device
+
+Required properties:
+- compatible: Must be "rockchip,rk3399-dmc".
+- devfreq-events: Node to get DDR loading, Refer to
+ Documentation/devicetree/bindings/devfreq/
+ rockchip-dfi.txt
+- interrupts: The interrupt number to the CPU. The interrupt
+ specifier format depends on the interrupt controller.
+ It should be DCF interrupts, when DDR dvfs finish,
+ it will happen.
+- clocks: Phandles for clock specified in "clock-names" property
+- clock-names : The name of clock used by the DFI, must be
+ "pclk_ddr_mon";
+- operating-points-v2: Refer to Documentation/devicetree/bindings/power/opp.txt
+ for details.
+- center-supply: DMC supply node.
+- status: Marks the node enabled/disabled.
+
+Following properties are ddr timing:
+
+- rockchip,dram_speed_bin : Value reference include/dt-bindings/clock/ddr.h,
+ it select ddr3 cl-trp-trcd type, default value
+ "DDR3_DEFAULT".it must selected according to
+ "Speed Bin" in ddr3 datasheet, DO NOT use
+ smaller "Speed Bin" than ddr3 exactly is.
+
+- rockchip,pd_idle : Config the PD_IDLE value, defined the power-down
+ idle period, memories are places into power-down
+ mode if bus is idle for PD_IDLE DFI clocks.
+
+- rockchip,sr_idle : Configure the SR_IDLE value, defined the
+ selfrefresh idle period, memories are places
+ into self-refresh mode if bus is idle for
+ SR_IDLE*1024 DFI clocks (DFI clocks freq is
+ half of dram's clocks), defaule value is "0".
+
+- rockchip,sr_mc_gate_idle : Defined the self-refresh with memory and
+ controller clock gating idle period, memories
+ are places into self-refresh mode and memory
+ controller clock arg gating if bus is idle for
+ sr_mc_gate_idle*1024 DFI clocks.
+
+- rockchip,srpd_lite_idle : Defined the self-refresh power down idle
+ period, memories are places into self-refresh
+ power down mode if bus is idle for
+ srpd_lite_idle*1024 DFI clocks. This parameter
+ is for LPDDR4 only.
+
+- rockchip,standby_idle : Defined the standby idle period, memories are
+ places into self-refresh than controller, pi,
+ phy and dram clock will gating if bus is idle
+ for standby_idle * DFI clocks.
+
+- rockchip,dram_dll_disb_freq : It's defined the DDR3 dll bypass frequency in
+ MHz, when ddr freq less than DRAM_DLL_DISB_FREQ,
+ ddr3 dll will bypssed note: if dll was bypassed,
+ the odt also stop working.
+
+- rockchip,phy_dll_disb_freq : Defined the PHY dll bypass frequency in
+ MHz (Mega Hz), when ddr freq less than
+ DRAM_DLL_DISB_FREQ, phy dll will bypssed.
+ note: phy dll and phy odt are independent.
+
+- rockchip,ddr3_odt_disb_freq : When dram type is DDR3, this parameter defined
+ the odt disable frequency in MHz (Mega Hz),
+ when ddr frequency less then ddr3_odt_disb_freq,
+ the odt on dram side and controller side are
+ both disabled.
+
+- rockchip,ddr3_drv : When dram type is DDR3, this parameter define
+ the dram side driver stength in ohm, default
+ value is DDR3_DS_40ohm.
+
+- rockchip,ddr3_odt : When dram type is DDR3, this parameter define
+ the dram side ODT stength in ohm, default value
+ is DDR3_ODT_120ohm.
+
+- rockchip,phy_ddr3_ca_drv : When dram type is DDR3, this parameter define
+ the phy side CA line(incluing command line,
+ address line and clock line) driver strength.
+ Default value is PHY_DRV_ODT_40.
+
+- rockchip,phy_ddr3_dq_drv : When dram type is DDR3, this parameter define
+ the phy side DQ line(incluing DQS/DQ/DM line)
+ driver strength. default value is PHY_DRV_ODT_40.
+
+- rockchip,phy_ddr3_odt : When dram type is DDR3, this parameter define the
+ phy side odt strength, default value is
+ PHY_DRV_ODT_240.
+
+- rockchip,lpddr3_odt_disb_freq : When dram type is LPDDR3, this parameter defined
+ then odt disable frequency in MHz (Mega Hz),
+ when ddr frequency less then ddr3_odt_disb_freq,
+ the odt on dram side and controller side are
+ both disabled.
+
+- rockchip,lpddr3_drv : When dram type is LPDDR3, this parameter define
+ the dram side driver stength in ohm, default
+ value is LP3_DS_34ohm.
+
+- rockchip,lpddr3_odt : When dram type is LPDDR3, this parameter define
+ the dram side ODT stength in ohm, default value
+ is LP3_ODT_240ohm.
+
+- rockchip,phy_lpddr3_ca_drv : When dram type is LPDDR3, this parameter define
+ the phy side CA line(incluing command line,
+ address line and clock line) driver strength.
+ default value is PHY_DRV_ODT_40.
+
+- rockchip,phy_lpddr3_dq_drv : When dram type is LPDDR3, this parameter define
+ the phy side DQ line(incluing DQS/DQ/DM line)
+ driver strength. default value is
+ PHY_DRV_ODT_40.
+
+- rockchip,phy_lpddr3_odt : When dram type is LPDDR3, this parameter define
+ the phy side odt strength, default value is
+ PHY_DRV_ODT_240.
+
+- rockchip,lpddr4_odt_disb_freq : When dram type is LPDDR4, this parameter
+ defined the odt disable frequency in
+ MHz (Mega Hz), when ddr frequency less then
+ ddr3_odt_disb_freq, the odt on dram side and
+ controller side are both disabled.
+
+- rockchip,lpddr4_drv : When dram type is LPDDR4, this parameter define
+ the dram side driver stength in ohm, default
+ value is LP4_PDDS_60ohm.
+
+- rockchip,lpddr4_dq_odt : When dram type is LPDDR4, this parameter define
+ the dram side ODT on dqs/dq line stength in ohm,
+ default value is LP4_DQ_ODT_40ohm.
+
+- rockchip,lpddr4_ca_odt : When dram type is LPDDR4, this parameter define
+ the dram side ODT on ca line stength in ohm,
+ default value is LP4_CA_ODT_40ohm.
+
+- rockchip,phy_lpddr4_ca_drv : When dram type is LPDDR4, this parameter define
+ the phy side CA line(incluing command address
+ line) driver strength. default value is
+ PHY_DRV_ODT_40.
+
+- rockchip,phy_lpddr4_ck_cs_drv : When dram type is LPDDR4, this parameter define
+ the phy side clock line and cs line driver
+ strength. default value is PHY_DRV_ODT_80.
+
+- rockchip,phy_lpddr4_dq_drv : When dram type is LPDDR4, this parameter define
+ the phy side DQ line(incluing DQS/DQ/DM line)
+ driver strength. default value is PHY_DRV_ODT_80.
+
+- rockchip,phy_lpddr4_odt : When dram type is LPDDR4, this parameter define
+ the phy side odt strength, default value is
+ PHY_DRV_ODT_60.
+
+Example:
+ dmc_opp_table: dmc_opp_table {
+ compatible = "operating-points-v2";
+
+ opp00 {
+ opp-hz = /bits/ 64 <300000000>;
+ opp-microvolt = <900000>;
+ };
+ opp01 {
+ opp-hz = /bits/ 64 <666000000>;
+ opp-microvolt = <900000>;
+ };
+ };
+
+ dmc: dmc {
+ compatible = "rockchip,rk3399-dmc";
+ devfreq-events = <&dfi>;
+ interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru SCLK_DDRCLK>;
+ clock-names = "dmc_clk";
+ operating-points-v2 = <&dmc_opp_table>;
+ center-supply = <&ppvar_centerlogic>;
+ upthreshold = <15>;
+ downdifferential = <10>;
+ rockchip,ddr3_speed_bin = <21>;
+ rockchip,pd_idle = <0x40>;
+ rockchip,sr_idle = <0x2>;
+ rockchip,sr_mc_gate_idle = <0x3>;
+ rockchip,srpd_lite_idle = <0x4>;
+ rockchip,standby_idle = <0x2000>;
+ rockchip,dram_dll_dis_freq = <300>;
+ rockchip,phy_dll_dis_freq = <125>;
+ rockchip,auto_pd_dis_freq = <666>;
+ rockchip,ddr3_odt_dis_freq = <333>;
+ rockchip,ddr3_drv = <DDR3_DS_40ohm>;
+ rockchip,ddr3_odt = <DDR3_ODT_120ohm>;
+ rockchip,phy_ddr3_ca_drv = <PHY_DRV_ODT_40>;
+ rockchip,phy_ddr3_dq_drv = <PHY_DRV_ODT_40>;
+ rockchip,phy_ddr3_odt = <PHY_DRV_ODT_240>;
+ rockchip,lpddr3_odt_dis_freq = <333>;
+ rockchip,lpddr3_drv = <LP3_DS_34ohm>;
+ rockchip,lpddr3_odt = <LP3_ODT_240ohm>;
+ rockchip,phy_lpddr3_ca_drv = <PHY_DRV_ODT_40>;
+ rockchip,phy_lpddr3_dq_drv = <PHY_DRV_ODT_40>;
+ rockchip,phy_lpddr3_odt = <PHY_DRV_ODT_240>;
+ rockchip,lpddr4_odt_dis_freq = <333>;
+ rockchip,lpddr4_drv = <LP4_PDDS_60ohm>;
+ rockchip,lpddr4_dq_odt = <LP4_DQ_ODT_40ohm>;
+ rockchip,lpddr4_ca_odt = <LP4_CA_ODT_40ohm>;
+ rockchip,phy_lpddr4_ca_drv = <PHY_DRV_ODT_40>;
+ rockchip,phy_lpddr4_ck_cs_drv = <PHY_DRV_ODT_80>;
+ rockchip,phy_lpddr4_dq_drv = <PHY_DRV_ODT_80>;
+ rockchip,phy_lpddr4_odt = <PHY_DRV_ODT_60>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt b/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt
index 1112e0d..820fee4 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt
@@ -13,6 +13,7 @@
- touchscreen-size-y : See touchscreen.txt
Optional properties:
+- firmware-name : File basename (string) for board specific firmware
- touchscreen-inverted-x : See touchscreen.txt
- touchscreen-inverted-y : See touchscreen.txt
- touchscreen-swapped-x-y : See touchscreen.txt
diff --git a/Documentation/devicetree/bindings/interrupt-controller/jcore,aic.txt b/Documentation/devicetree/bindings/interrupt-controller/jcore,aic.txt
new file mode 100644
index 0000000..ee2ad36
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/jcore,aic.txt
@@ -0,0 +1,26 @@
+J-Core Advanced Interrupt Controller
+
+Required properties:
+
+- compatible: Should be "jcore,aic1" for the (obsolete) first-generation aic
+ with 8 interrupt lines with programmable priorities, or "jcore,aic2" for
+ the "aic2" core with 64 interrupts.
+
+- reg: Memory region(s) for configuration. For SMP, there should be one
+ region per cpu, indexed by the sequential, zero-based hardware cpu
+ number.
+
+- interrupt-controller: Identifies the node as an interrupt controller
+
+- #interrupt-cells: Specifies the number of cells needed to encode an
+ interrupt source. The value shall be 1.
+
+
+Example:
+
+aic: interrupt-controller@200 {
+ compatible = "jcore,aic2";
+ reg = < 0x200 0x30 0x500 0x30 >;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.txt
new file mode 100644
index 0000000..86a7b4c
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.txt
@@ -0,0 +1,25 @@
+Marvell Armada 7K/8K PIC Interrupt controller
+---------------------------------------------
+
+This is the Device Tree binding for the PIC, a secondary interrupt
+controller available on the Marvell Armada 7K/8K ARM64 SoCs, and
+typically connected to the GIC as the primary interrupt controller.
+
+Required properties:
+- compatible: should be "marvell,armada-8k-pic"
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: the number of cells to define interrupts on this
+ controller. Should be 1
+- reg: the register area for the PIC interrupt controller
+- interrupts: the interrupt to the primary interrupt controller,
+ typically the GIC
+
+Example:
+
+ pic: interrupt-controller@3f0100 {
+ compatible = "marvell,armada-8k-pic";
+ reg = <0x3f0100 0x10>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ interrupts = <GIC_PPI 15 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt
index 8af0a8e..3f6442c 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt
@@ -31,7 +31,7 @@
Example:
odmi: odmi@300000 {
- compatible = "marvell,ap806-odm-controller",
+ compatible = "marvell,ap806-odmi-controller",
"marvell,odmi-controller";
interrupt-controller;
msi-controller;
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt
new file mode 100644
index 0000000..6e7703d
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt
@@ -0,0 +1,20 @@
+STM32 External Interrupt Controller
+
+Required properties:
+
+- compatible: Should be "st,stm32-exti"
+- reg: Specifies base physical address and size of the registers
+- interrupt-controller: Indentifies the node as an interrupt controller
+- #interrupt-cells: Specifies the number of cells to encode an interrupt
+ specifier, shall be 2
+- interrupts: interrupts references to primary interrupt controller
+
+Example:
+
+exti: interrupt-controller@40013c00 {
+ compatible = "st,stm32-exti";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ reg = <0x40013C00 0x400>;
+ interrupts = <1>, <2>, <3>, <6>, <7>, <8>, <9>, <10>, <23>, <40>, <41>, <42>, <62>, <76>;
+};
diff --git a/Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt b/Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt
index 8f86ab3..94aeeea 100644
--- a/Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt
+++ b/Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt
@@ -1,11 +1,20 @@
= Rockchip eFuse device tree bindings =
Required properties:
-- compatible: Should be "rockchip,rockchip-efuse"
+- compatible: Should be one of the following.
+ - "rockchip,rk3066a-efuse" - for RK3066a SoCs.
+ - "rockchip,rk3188-efuse" - for RK3188 SoCs.
+ - "rockchip,rk3288-efuse" - for RK3288 SoCs.
+ - "rockchip,rk3399-efuse" - for RK3399 SoCs.
- reg: Should contain the registers location and exact eFuse size
- clocks: Should be the clock id of eFuse
- clock-names: Should be "pclk_efuse"
+Deprecated properties:
+- compatible: "rockchip,rockchip-efuse"
+ Old efuse compatible value compatible to rk3066a, rk3188 and rk3288
+ efuses
+
= Data cells =
Are child nodes of eFuse, bindings of which as described in
bindings/nvmem/nvmem.txt
@@ -13,7 +22,7 @@
Example:
efuse: efuse@ffb40000 {
- compatible = "rockchip,rockchip-efuse";
+ compatible = "rockchip,rk3288-efuse";
reg = <0xffb40000 0x20>;
#address-cells = <1>;
#size-cells = <1>;
diff --git a/Documentation/devicetree/bindings/serial/8250.txt b/Documentation/devicetree/bindings/serial/8250.txt
index 936ab5b..f86bb06 100644
--- a/Documentation/devicetree/bindings/serial/8250.txt
+++ b/Documentation/devicetree/bindings/serial/8250.txt
@@ -42,6 +42,8 @@
- auto-flow-control: one way to enable automatic flow control support. The
driver is allowed to detect support for the capability even without this
property.
+- tx-threshold: Specify the TX FIFO low water indication for parts with
+ programmable TX FIFO thresholds.
Note:
* fsl,ns16550:
diff --git a/Documentation/devicetree/bindings/serial/st,stm32-usart.txt b/Documentation/devicetree/bindings/serial/st,stm32-usart.txt
new file mode 100644
index 0000000..85ec5f2
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/st,stm32-usart.txt
@@ -0,0 +1,46 @@
+* STMicroelectronics STM32 USART
+
+Required properties:
+- compatible: Can be either "st,stm32-usart", "st,stm32-uart",
+"st,stm32f7-usart" or "st,stm32f7-uart" depending on whether
+the device supports synchronous mode and is compatible with
+stm32(f4) or stm32f7.
+- reg: The address and length of the peripheral registers space
+- interrupts: The interrupt line of the USART instance
+- clocks: The input clock of the USART instance
+
+Optional properties:
+- pinctrl: The reference on the pins configuration
+- st,hw-flow-ctrl: bool flag to enable hardware flow control.
+- dmas: phandle(s) to DMA controller node(s). Refer to stm32-dma.txt
+- dma-names: "rx" and/or "tx"
+
+Examples:
+usart4: serial@40004c00 {
+ compatible = "st,stm32-uart";
+ reg = <0x40004c00 0x400>;
+ interrupts = <52>;
+ clocks = <&clk_pclk1>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usart4>;
+};
+
+usart2: serial@40004400 {
+ compatible = "st,stm32-usart", "st,stm32-uart";
+ reg = <0x40004400 0x400>;
+ interrupts = <38>;
+ clocks = <&clk_pclk1>;
+ st,hw-flow-ctrl;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usart2 &pinctrl_usart2_rtscts>;
+};
+
+usart1: serial@40011000 {
+ compatible = "st,stm32-usart", "st,stm32-uart";
+ reg = <0x40011000 0x400>;
+ interrupts = <37>;
+ clocks = <&rcc 0 164>;
+ dmas = <&dma2 2 4 0x414 0x0>,
+ <&dma2 7 4 0x414 0x0>;
+ dma-names = "rx", "tx";
+};
diff --git a/Documentation/devicetree/bindings/timer/moxa,moxart-timer.txt b/Documentation/devicetree/bindings/timer/moxa,moxart-timer.txt
index da2d510..e207c11 100644
--- a/Documentation/devicetree/bindings/timer/moxa,moxart-timer.txt
+++ b/Documentation/devicetree/bindings/timer/moxa,moxart-timer.txt
@@ -2,7 +2,9 @@
Required properties:
-- compatible : Must be "moxa,moxart-timer"
+- compatible : Must be one of:
+ - "moxa,moxart-timer"
+ - "aspeed,ast2400-timer"
- reg : Should contain registers location and length
- interrupts : Should contain the timer interrupt number
- clocks : Should contain phandle for the clock that drives the counter
diff --git a/Documentation/devicetree/bindings/timer/oxsemi,rps-timer.txt b/Documentation/devicetree/bindings/timer/oxsemi,rps-timer.txt
index 3ca89cd..d191612 100644
--- a/Documentation/devicetree/bindings/timer/oxsemi,rps-timer.txt
+++ b/Documentation/devicetree/bindings/timer/oxsemi,rps-timer.txt
@@ -2,7 +2,7 @@
================================================
Required properties:
-- compatible: Should be "oxsemi,ox810se-rps-timer"
+- compatible: Should be "oxsemi,ox810se-rps-timer" or "oxsemi,ox820-rps-timer"
- reg : Specifies base physical address and size of the registers.
- interrupts : The interrupts of the two timers
- clocks : The phandle of the timer clock source
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index edb7d5d..01085cd 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -698,6 +698,15 @@
loops can be debugged more effectively on production
systems.
+ clocksource.arm_arch_timer.fsl-a008585=
+ [ARM64]
+ Format: <bool>
+ Enable/disable the workaround of Freescale/NXP
+ erratum A-008585. This can be useful for KVM
+ guests, if the guest device tree doesn't show the
+ erratum. If unspecified, the workaround is
+ enabled based on the device tree.
+
clearcpuid=BITNUM [X86]
Disable CPUID feature X for the kernel. See
arch/x86/include/asm/cpufeatures.h for the valid bit
@@ -1045,11 +1054,12 @@
determined by the stdout-path property in device
tree's chosen node.
- cdns,<addr>
- Start an early, polled-mode console on a cadence serial
- port at the specified address. The cadence serial port
- must already be setup and configured. Options are not
- yet supported.
+ cdns,<addr>[,options]
+ Start an early, polled-mode console on a Cadence
+ (xuartps) serial port at the specified address. Only
+ supported option is baud rate. If baud rate is not
+ specified, the serial port must already be setup and
+ configured.
uart[8250],io,<addr>[,options]
uart[8250],mmio,<addr>[,options]
diff --git a/Documentation/ko_KR/memory-barriers.txt b/Documentation/ko_KR/memory-barriers.txt
new file mode 100644
index 0000000..34d3d38
--- /dev/null
+++ b/Documentation/ko_KR/memory-barriers.txt
@@ -0,0 +1,3135 @@
+NOTE:
+This is a version of Documentation/memory-barriers.txt translated into Korean.
+This document is maintained by SeongJae Park <sj38.park@gmail.com>.
+If you find any difference between this document and the original file or
+a problem with the translation, please contact the maintainer of this file.
+
+Please also note that the purpose of this file is to be easier to
+read for non English (read: Korean) speakers and is not intended as
+a fork. So if you have any comments or updates for this file please
+update the original English file first. The English version is
+definitive, and readers should look there if they have any doubt.
+
+===================================
+이 문서는
+Documentation/memory-barriers.txt
+의 한글 번역입니다.
+
+역자: 박성재 <sj38.park@gmail.com>
+===================================
+
+
+ =========================
+ 리눅스 커널 메모리 배리어
+ =========================
+
+저자: David Howells <dhowells@redhat.com>
+ Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ Will Deacon <will.deacon@arm.com>
+ Peter Zijlstra <peterz@infradead.org>
+
+========
+면책조항
+========
+
+이 문서는 명세서가 아닙니다; 이 문서는 완벽하지 않은데, 간결성을 위해 의도된
+부분도 있고, 의도하진 않았지만 사람에 의해 쓰였다보니 불완전한 부분도 있습니다.
+이 문서는 리눅스에서 제공하는 다양한 메모리 배리어들을 사용하기 위한
+안내서입니다만, 뭔가 이상하다 싶으면 (그런게 많을 겁니다) 질문을 부탁드립니다.
+
+다시 말하지만, 이 문서는 리눅스가 하드웨어에 기대하는 사항에 대한 명세서가
+아닙니다.
+
+이 문서의 목적은 두가지입니다:
+
+ (1) 어떤 특정 배리어에 대해 기대할 수 있는 최소한의 기능을 명세하기 위해서,
+ 그리고
+
+ (2) 사용 가능한 배리어들에 대해 어떻게 사용해야 하는지에 대한 안내를 제공하기
+ 위해서.
+
+어떤 아키텍쳐는 특정한 배리어들에 대해서는 여기서 이야기하는 최소한의
+요구사항들보다 많은 기능을 제공할 수도 있습니다만, 여기서 이야기하는
+요구사항들을 충족하지 않는 아키텍쳐가 있다면 그 아키텍쳐가 잘못된 것이란 점을
+알아두시기 바랍니다.
+
+또한, 특정 아키텍쳐에서 일부 배리어는 해당 아키텍쳐의 특수한 동작 방식으로 인해
+해당 배리어의 명시적 사용이 불필요해서 no-op 이 될수도 있음을 알아두시기
+바랍니다.
+
+역자: 본 번역 역시 완벽하지 않은데, 이 역시 부분적으로는 의도된 것이기도
+합니다. 여타 기술 문서들이 그렇듯 완벽한 이해를 위해서는 번역문과 원문을 함께
+읽으시되 번역문을 하나의 가이드로 활용하시길 추천드리며, 발견되는 오역 등에
+대해서는 언제든 의견을 부탁드립니다. 과한 번역으로 인한 오해를 최소화하기 위해
+애매한 부분이 있을 경우에는 어색함이 있더라도 원래의 용어를 차용합니다.
+
+
+=====
+목차:
+=====
+
+ (*) 추상 메모리 액세스 모델.
+
+ - 디바이스 오퍼레이션.
+ - 보장사항.
+
+ (*) 메모리 배리어란 무엇인가?
+
+ - 메모리 배리어의 종류.
+ - 메모리 배리어에 대해 가정해선 안될 것.
+ - 데이터 의존성 배리어.
+ - 컨트롤 의존성.
+ - SMP 배리어 짝맞추기.
+ - 메모리 배리어 시퀀스의 예.
+ - 읽기 메모리 배리어 vs 로드 예측.
+ - 이행성
+
+ (*) 명시적 커널 배리어.
+
+ - 컴파일러 배리어.
+ - CPU 메모리 배리어.
+ - MMIO 쓰기 배리어.
+
+ (*) 암묵적 커널 메모리 배리어.
+
+ - 락 Acquisition 함수.
+ - 인터럽트 비활성화 함수.
+ - 슬립과 웨이크업 함수.
+ - 그외의 함수들.
+
+ (*) CPU 간 ACQUIRING 배리어의 효과.
+
+ - Acquire vs 메모리 액세스.
+ - Acquire vs I/O 액세스.
+
+ (*) 메모리 배리어가 필요한 곳
+
+ - 프로세서간 상호 작용.
+ - 어토믹 오퍼레이션.
+ - 디바이스 액세스.
+ - 인터럽트.
+
+ (*) 커널 I/O 배리어의 효과.
+
+ (*) 가정되는 가장 완화된 실행 순서 모델.
+
+ (*) CPU 캐시의 영향.
+
+ - 캐시 일관성.
+ - 캐시 일관성 vs DMA.
+ - 캐시 일관성 vs MMIO.
+
+ (*) CPU 들이 저지르는 일들.
+
+ - 그리고, Alpha 가 있다.
+ - 가상 머신 게스트.
+
+ (*) 사용 예.
+
+ - 순환식 버퍼.
+
+ (*) 참고 문헌.
+
+
+=======================
+추상 메모리 액세스 모델
+=======================
+
+다음과 같이 추상화된 시스템 모델을 생각해 봅시다:
+
+ : :
+ : :
+ : :
+ +-------+ : +--------+ : +-------+
+ | | : | | : | |
+ | | : | | : | |
+ | CPU 1 |<----->| Memory |<----->| CPU 2 |
+ | | : | | : | |
+ | | : | | : | |
+ +-------+ : +--------+ : +-------+
+ ^ : ^ : ^
+ | : | : |
+ | : | : |
+ | : v : |
+ | : +--------+ : |
+ | : | | : |
+ | : | | : |
+ +---------->| Device |<----------+
+ : | | :
+ : | | :
+ : +--------+ :
+ : :
+
+프로그램은 여러 메모리 액세스 오퍼레이션을 발생시키고, 각각의 CPU 는 그런
+프로그램들을 실행합니다. 추상화된 CPU 모델에서 메모리 오퍼레이션들의 순서는
+매우 완화되어 있고, CPU 는 프로그램이 인과관계를 어기지 않는 상태로 관리된다고
+보일 수만 있다면 메모리 오퍼레이션을 자신이 원하는 어떤 순서대로든 재배치해
+동작시킬 수 있습니다. 비슷하게, 컴파일러 또한 프로그램의 정상적 동작을 해치지
+않는 한도 내에서는 어떤 순서로든 자신이 원하는 대로 인스트럭션을 재배치 할 수
+있습니다.
+
+따라서 위의 다이어그램에서 한 CPU가 동작시키는 메모리 오퍼레이션이 만들어내는
+변화는 해당 오퍼레이션이 CPU 와 시스템의 다른 부분들 사이의 인터페이스(점선)를
+지나가면서 시스템의 나머지 부분들에 인지됩니다.
+
+
+예를 들어, 다음의 일련의 이벤트들을 생각해 봅시다:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { A == 1; B == 2 }
+ A = 3; x = B;
+ B = 4; y = A;
+
+다이어그램의 가운데에 위치한 메모리 시스템에 보여지게 되는 액세스들은 다음의 총
+24개의 조합으로 재구성될 수 있습니다:
+
+ STORE A=3, STORE B=4, y=LOAD A->3, x=LOAD B->4
+ STORE A=3, STORE B=4, x=LOAD B->4, y=LOAD A->3
+ STORE A=3, y=LOAD A->3, STORE B=4, x=LOAD B->4
+ STORE A=3, y=LOAD A->3, x=LOAD B->2, STORE B=4
+ STORE A=3, x=LOAD B->2, STORE B=4, y=LOAD A->3
+ STORE A=3, x=LOAD B->2, y=LOAD A->3, STORE B=4
+ STORE B=4, STORE A=3, y=LOAD A->3, x=LOAD B->4
+ STORE B=4, ...
+ ...
+
+따라서 다음의 네가지 조합의 값들이 나올 수 있습니다:
+
+ x == 2, y == 1
+ x == 2, y == 3
+ x == 4, y == 1
+ x == 4, y == 3
+
+
+한발 더 나아가서, 한 CPU 가 메모리 시스템에 반영한 스토어 오퍼레이션들의 결과는
+다른 CPU 에서의 로드 오퍼레이션을 통해 인지되는데, 이 때 스토어가 반영된 순서와
+다른 순서로 인지될 수도 있습니다.
+
+
+예로, 아래의 일련의 이벤트들을 생각해 봅시다:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { A == 1, B == 2, C == 3, P == &A, Q == &C }
+ B = 4; Q = P;
+ P = &B D = *Q;
+
+D 로 읽혀지는 값은 CPU 2 에서 P 로부터 읽혀진 주소값에 의존적이기 때문에 여기엔
+분명한 데이터 의존성이 있습니다. 하지만 이 이벤트들의 실행 결과로는 아래의
+결과들이 모두 나타날 수 있습니다:
+
+ (Q == &A) and (D == 1)
+ (Q == &B) and (D == 2)
+ (Q == &B) and (D == 4)
+
+CPU 2 는 *Q 의 로드를 요청하기 전에 P 를 Q 에 넣기 때문에 D 에 C 를 집어넣는
+일은 없음을 알아두세요.
+
+
+디바이스 오퍼레이션
+-------------------
+
+일부 디바이스는 자신의 컨트롤 인터페이스를 메모리의 특정 영역으로 매핑해서
+제공하는데(Memory mapped I/O), 해당 컨트롤 레지스터에 접근하는 순서는 매우
+중요합니다. 예를 들어, 어드레스 포트 레지스터 (A) 와 데이터 포트 레지스터 (D)
+를 통해 접근되는 내부 레지스터 집합을 갖는 이더넷 카드를 생각해 봅시다. 내부의
+5번 레지스터를 읽기 위해 다음의 코드가 사용될 수 있습니다:
+
+ *A = 5;
+ x = *D;
+
+하지만, 이건 다음의 두 조합 중 하나로 만들어질 수 있습니다:
+
+ STORE *A = 5, x = LOAD *D
+ x = LOAD *D, STORE *A = 5
+
+두번째 조합은 데이터를 읽어온 _후에_ 주소를 설정하므로, 오동작을 일으킬 겁니다.
+
+
+보장사항
+--------
+
+CPU 에게 기대할 수 있는 최소한의 보장사항 몇가지가 있습니다:
+
+ (*) 어떤 CPU 든, 의존성이 존재하는 메모리 액세스들은 해당 CPU 자신에게
+ 있어서는 순서대로 메모리 시스템에 수행 요청됩니다. 즉, 다음에 대해서:
+
+ Q = READ_ONCE(P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
+
+ CPU 는 다음과 같은 메모리 오퍼레이션 시퀀스를 수행 요청합니다:
+
+ Q = LOAD P, D = LOAD *Q
+
+ 그리고 그 시퀀스 내에서의 순서는 항상 지켜집니다. 대부분의 시스템에서
+ smp_read_barrier_depends() 는 아무일도 안하지만 DEC Alpha 에서는
+ 명시적으로 사용되어야 합니다. 보통의 경우에는 smp_read_barrier_depends()
+ 를 직접 사용하는 대신 rcu_dereference() 같은 것들을 사용해야 함을
+ 알아두세요.
+
+ (*) 특정 CPU 내에서 겹치는 영역의 메모리에 행해지는 로드와 스토어 들은 해당
+ CPU 안에서는 순서가 바뀌지 않은 것으로 보여집니다. 즉, 다음에 대해서:
+
+ a = READ_ONCE(*X); WRITE_ONCE(*X, b);
+
+ CPU 는 다음의 메모리 오퍼레이션 시퀀스만을 메모리에 요청할 겁니다:
+
+ a = LOAD *X, STORE *X = b
+
+ 그리고 다음에 대해서는:
+
+ WRITE_ONCE(*X, c); d = READ_ONCE(*X);
+
+ CPU 는 다음의 수행 요청만을 만들어 냅니다:
+
+ STORE *X = c, d = LOAD *X
+
+ (로드 오퍼레이션과 스토어 오퍼레이션이 겹치는 메모리 영역에 대해
+ 수행된다면 해당 오퍼레이션들은 겹친다고 표현됩니다).
+
+그리고 _반드시_ 또는 _절대로_ 가정하거나 가정하지 말아야 하는 것들이 있습니다:
+
+ (*) 컴파일러가 READ_ONCE() 나 WRITE_ONCE() 로 보호되지 않은 메모리 액세스를
+ 당신이 원하는 대로 할 것이라는 가정은 _절대로_ 해선 안됩니다. 그것들이
+ 없다면, 컴파일러는 컴파일러 배리어 섹션에서 다루게 될, 모든 "창의적인"
+ 변경들을 만들어낼 권한을 갖게 됩니다.
+
+ (*) 개별적인 로드와 스토어들이 주어진 순서대로 요청될 것이라는 가정은 _절대로_
+ 하지 말아야 합니다. 이 말은 곧:
+
+ X = *A; Y = *B; *D = Z;
+
+ 는 다음의 것들 중 어느 것으로든 만들어질 수 있다는 의미입니다:
+
+ X = LOAD *A, Y = LOAD *B, STORE *D = Z
+ X = LOAD *A, STORE *D = Z, Y = LOAD *B
+ Y = LOAD *B, X = LOAD *A, STORE *D = Z
+ Y = LOAD *B, STORE *D = Z, X = LOAD *A
+ STORE *D = Z, X = LOAD *A, Y = LOAD *B
+ STORE *D = Z, Y = LOAD *B, X = LOAD *A
+
+ (*) 겹치는 메모리 액세스들은 합쳐지거나 버려질 수 있음을 _반드시_ 가정해야
+ 합니다. 다음의 코드는:
+
+ X = *A; Y = *(A + 4);
+
+ 다음의 것들 중 뭐든 될 수 있습니다:
+
+ X = LOAD *A; Y = LOAD *(A + 4);
+ Y = LOAD *(A + 4); X = LOAD *A;
+ {X, Y} = LOAD {*A, *(A + 4) };
+
+ 그리고:
+
+ *A = X; *(A + 4) = Y;
+
+ 는 다음 중 뭐든 될 수 있습니다:
+
+ STORE *A = X; STORE *(A + 4) = Y;
+ STORE *(A + 4) = Y; STORE *A = X;
+ STORE {*A, *(A + 4) } = {X, Y};
+
+그리고 보장사항에 반대되는 것들(anti-guarantees)이 있습니다:
+
+ (*) 이 보장사항들은 bitfield 에는 적용되지 않는데, 컴파일러들은 bitfield 를
+ 수정하는 코드를 생성할 때 원자성 없는(non-atomic) 읽고-수정하고-쓰는
+ 인스트럭션들의 조합을 만드는 경우가 많기 때문입니다. 병렬 알고리즘의
+ 동기화에 bitfield 를 사용하려 하지 마십시오.
+
+ (*) bitfield 들이 여러 락으로 보호되는 경우라 하더라도, 하나의 bitfield 의
+ 모든 필드들은 하나의 락으로 보호되어야 합니다. 만약 한 bitfield 의 두
+ 필드가 서로 다른 락으로 보호된다면, 컴파일러의 원자성 없는
+ 읽고-수정하고-쓰는 인스트럭션 조합은 한 필드에의 업데이트가 근처의
+ 필드에도 영향을 끼치게 할 수 있습니다.
+
+ (*) 이 보장사항들은 적절하게 정렬되고 크기가 잡힌 스칼라 변수들에 대해서만
+ 적용됩니다. "적절하게 크기가 잡힌" 이라함은 현재로써는 "char", "short",
+ "int" 그리고 "long" 과 같은 크기의 변수들을 의미합니다. "적절하게 정렬된"
+ 은 자연스런 정렬을 의미하는데, 따라서 "char" 에 대해서는 아무 제약이 없고,
+ "short" 에 대해서는 2바이트 정렬을, "int" 에는 4바이트 정렬을, 그리고
+ "long" 에 대해서는 32-bit 시스템인지 64-bit 시스템인지에 따라 4바이트 또는
+ 8바이트 정렬을 의미합니다. 이 보장사항들은 C11 표준에서 소개되었으므로,
+ C11 전의 오래된 컴파일러(예를 들어, gcc 4.6) 를 사용할 때엔 주의하시기
+ 바랍니다. 표준에 이 보장사항들은 "memory location" 을 정의하는 3.14
+ 섹션에 다음과 같이 설명되어 있습니다:
+ (역자: 인용문이므로 번역하지 않습니다)
+
+ memory location
+ either an object of scalar type, or a maximal sequence
+ of adjacent bit-fields all having nonzero width
+
+ NOTE 1: Two threads of execution can update and access
+ separate memory locations without interfering with
+ each other.
+
+ NOTE 2: A bit-field and an adjacent non-bit-field member
+ are in separate memory locations. The same applies
+ to two bit-fields, if one is declared inside a nested
+ structure declaration and the other is not, or if the two
+ are separated by a zero-length bit-field declaration,
+ or if they are separated by a non-bit-field member
+ declaration. It is not safe to concurrently update two
+ bit-fields in the same structure if all members declared
+ between them are also bit-fields, no matter what the
+ sizes of those intervening bit-fields happen to be.
+
+
+=========================
+메모리 배리어란 무엇인가?
+=========================
+
+앞에서 봤듯이, 상호간 의존성이 없는 메모리 오퍼레이션들은 실제로는 무작위적
+순서로 수행될 수 있으며, 이는 CPU 와 CPU 간의 상호작용이나 I/O 에 문제가 될 수
+있습니다. 따라서 컴파일러와 CPU 가 순서를 바꾸는데 제약을 걸 수 있도록 개입할
+수 있는 어떤 방법이 필요합니다.
+
+메모리 배리어는 그런 개입 수단입니다. 메모리 배리어는 배리어를 사이에 둔 앞과
+뒤 양측의 메모리 오퍼레이션들 간에 부분적 순서가 존재하도록 하는 효과를 줍니다.
+
+시스템의 CPU 들과 여러 디바이스들은 성능을 올리기 위해 명령어 재배치, 실행
+유예, 메모리 오퍼레이션들의 조합, 예측적 로드(speculative load), 브랜치
+예측(speculative branch prediction), 다양한 종류의 캐싱(caching) 등의 다양한
+트릭을 사용할 수 있기 때문에 이런 강제력은 중요합니다. 메모리 배리어들은 이런
+트릭들을 무효로 하거나 억제하는 목적으로 사용되어져서 코드가 여러 CPU 와
+디바이스들 간의 상호작용을 정상적으로 제어할 수 있게 해줍니다.
+
+
+메모리 배리어의 종류
+--------------------
+
+메모리 배리어는 네개의 기본 타입으로 분류됩니다:
+
+ (1) 쓰기 (또는 스토어) 메모리 배리어.
+
+ 쓰기 메모리 배리어는 시스템의 다른 컴포넌트들에 해당 배리어보다 앞서
+ 명시된 모든 STORE 오퍼레이션들이 해당 배리어 뒤에 명시된 모든 STORE
+ 오퍼레이션들보다 먼저 수행된 것으로 보일 것을 보장합니다.
+
+ 쓰기 배리어는 스토어 오퍼레이션들에 대한 부분적 순서 세우기입니다; 로드
+ 오퍼레이션들에 대해서는 어떤 영향도 끼치지 않습니다.
+
+ CPU 는 시간의 흐름에 따라 메모리 시스템에 일련의 스토어 오퍼레이션들을
+ 하나씩 요청해 집어넣습니다. 쓰기 배리어 앞의 모든 스토어 오퍼레이션들은
+ 쓰기 배리어 뒤의 모든 스토어 오퍼레이션들보다 _앞서_ 수행될 겁니다.
+
+ [!] 쓰기 배리어들은 읽기 또는 데이터 의존성 배리어와 함께 짝을 맞춰
+ 사용되어야만 함을 알아두세요; "SMP 배리어 짝맞추기" 서브섹션을 참고하세요.
+
+
+ (2) 데이터 의존성 배리어.
+
+ 데이터 의존성 배리어는 읽기 배리어의 보다 완화된 형태입니다. 두개의 로드
+ 오퍼레이션이 있고 두번째 것이 첫번째 것의 결과에 의존하고 있을 때(예:
+ 두번째 로드가 참조할 주소를 첫번째 로드가 읽는 경우), 두번째 로드가 읽어올
+ 데이터는 첫번째 로드에 의해 그 주소가 얻어지기 전에 업데이트 되어 있음을
+ 보장하기 위해서 데이터 의존성 배리어가 필요할 수 있습니다.
+
+ 데이터 의존성 배리어는 상호 의존적인 로드 오퍼레이션들 사이의 부분적 순서
+ 세우기입니다; 스토어 오퍼레이션들이나 독립적인 로드들, 또는 중복되는
+ 로드들에 대해서는 어떤 영향도 끼치지 않습니다.
+
+ (1) 에서 언급했듯이, 시스템의 CPU 들은 메모리 시스템에 일련의 스토어
+ 오퍼레이션들을 던져 넣고 있으며, 거기에 관심이 있는 다른 CPU 는 그
+ 오퍼레이션들을 메모리 시스템이 실행한 결과를 인지할 수 있습니다. 이처럼
+ 다른 CPU 의 스토어 오퍼레이션의 결과에 관심을 두고 있는 CPU 가 수행 요청한
+ 데이터 의존성 배리어는, 배리어 앞의 어떤 로드 오퍼레이션이 다른 CPU 에서
+ 던져 넣은 스토어 오퍼레이션과 같은 영역을 향했다면, 그런 스토어
+ 오퍼레이션들이 만들어내는 결과가 데이터 의존성 배리어 뒤의 로드
+ 오퍼레이션들에게는 보일 것을 보장합니다.
+
+ 이 순서 세우기 제약에 대한 그림을 보기 위해선 "메모리 배리어 시퀀스의 예"
+ 서브섹션을 참고하시기 바랍니다.
+
+ [!] 첫번째 로드는 반드시 _데이터_ 의존성을 가져야지 컨트롤 의존성을 가져야
+ 하는게 아님을 알아두십시오. 만약 두번째 로드를 위한 주소가 첫번째 로드에
+ 의존적이지만 그 의존성은 조건적이지 그 주소 자체를 가져오는게 아니라면,
+ 그것은 _컨트롤_ 의존성이고, 이 경우에는 읽기 배리어나 그보다 강력한
+ 무언가가 필요합니다. 더 자세한 내용을 위해서는 "컨트롤 의존성" 서브섹션을
+ 참고하시기 바랍니다.
+
+ [!] 데이터 의존성 배리어는 보통 쓰기 배리어들과 함께 짝을 맞춰 사용되어야
+ 합니다; "SMP 배리어 짝맞추기" 서브섹션을 참고하세요.
+
+
+ (3) 읽기 (또는 로드) 메모리 배리어.
+
+ 읽기 배리어는 데이터 의존성 배리어 기능의 보장사항에 더해서 배리어보다
+ 앞서 명시된 모든 LOAD 오퍼레이션들이 배리어 뒤에 명시되는 모든 LOAD
+ 오퍼레이션들보다 먼저 행해진 것으로 시스템의 다른 컴포넌트들에 보여질 것을
+ 보장합니다.
+
+ 읽기 배리어는 로드 오퍼레이션에 행해지는 부분적 순서 세우기입니다; 스토어
+ 오퍼레이션에 대해서는 어떤 영향도 끼치지 않습니다.
+
+ 읽기 메모리 배리어는 데이터 의존성 배리어를 내장하므로 데이터 의존성
+ 배리어를 대신할 수 있습니다.
+
+ [!] 읽기 배리어는 일반적으로 쓰기 배리어들과 함께 짝을 맞춰 사용되어야
+ 합니다; "SMP 배리어 짝맞추기" 서브섹션을 참고하세요.
+
+
+ (4) 범용 메모리 배리어.
+
+ 범용(general) 메모리 배리어는 배리어보다 앞서 명시된 모든 LOAD 와 STORE
+ 오퍼레이션들이 배리어 뒤에 명시된 모든 LOAD 와 STORE 오퍼레이션들보다
+ 먼저 수행된 것으로 시스템의 나머지 컴포넌트들에 보이게 됨을 보장합니다.
+
+ 범용 메모리 배리어는 로드와 스토어 모두에 대한 부분적 순서 세우기입니다.
+
+ 범용 메모리 배리어는 읽기 메모리 배리어, 쓰기 메모리 배리어 모두를
+ 내장하므로, 두 배리어를 모두 대신할 수 있습니다.
+
+
+그리고 두개의 명시적이지 않은 타입이 있습니다:
+
+ (5) ACQUIRE 오퍼레이션.
+
+ 이 타입의 오퍼레이션은 단방향의 투과성 배리어처럼 동작합니다. ACQUIRE
+ 오퍼레이션 뒤의 모든 메모리 오퍼레이션들이 ACQUIRE 오퍼레이션 후에
+ 일어난 것으로 시스템의 나머지 컴포넌트들에 보이게 될 것이 보장됩니다.
+ LOCK 오퍼레이션과 smp_load_acquire(), smp_cond_acquire() 오퍼레이션도
+ ACQUIRE 오퍼레이션에 포함됩니다. smp_cond_acquire() 오퍼레이션은 컨트롤
+ 의존성과 smp_rmb() 를 사용해서 ACQUIRE 의 의미적 요구사항(semantic)을
+ 충족시킵니다.
+
+ ACQUIRE 오퍼레이션 앞의 메모리 오퍼레이션들은 ACQUIRE 오퍼레이션 완료 후에
+ 수행된 것처럼 보일 수 있습니다.
+
+ ACQUIRE 오퍼레이션은 거의 항상 RELEASE 오퍼레이션과 짝을 지어 사용되어야
+ 합니다.
+
+
+ (6) RELEASE 오퍼레이션.
+
+ 이 타입의 오퍼레이션들도 단방향 투과성 배리어처럼 동작합니다. RELEASE
+ 오퍼레이션 앞의 모든 메모리 오퍼레이션들은 RELEASE 오퍼레이션 전에 완료된
+ 것으로 시스템의 다른 컴포넌트들에 보여질 것이 보장됩니다. UNLOCK 류의
+ 오퍼레이션들과 smp_store_release() 오퍼레이션도 RELEASE 오퍼레이션의
+ 일종입니다.
+
+ RELEASE 오퍼레이션 뒤의 메모리 오퍼레이션들은 RELEASE 오퍼레이션이
+ 완료되기 전에 행해진 것처럼 보일 수 있습니다.
+
+ ACQUIRE 와 RELEASE 오퍼레이션의 사용은 일반적으로 다른 메모리 배리어의
+ 필요성을 없앱니다 (하지만 "MMIO 쓰기 배리어" 서브섹션에서 설명되는 예외를
+ 알아두세요). 또한, RELEASE+ACQUIRE 조합은 범용 메모리 배리어처럼 동작할
+ 것을 보장하지 -않습니다-. 하지만, 어떤 변수에 대한 RELEASE 오퍼레이션을
+ 앞서는 메모리 액세스들의 수행 결과는 이 RELEASE 오퍼레이션을 뒤이어 같은
+ 변수에 대해 수행된 ACQUIRE 오퍼레이션을 뒤따르는 메모리 액세스에는 보여질
+ 것이 보장됩니다. 다르게 말하자면, 주어진 변수의 크리티컬 섹션에서는, 해당
+ 변수에 대한 앞의 크리티컬 섹션에서의 모든 액세스들이 완료되었을 것을
+ 보장합니다.
+
+ 즉, ACQUIRE 는 최소한의 "취득" 동작처럼, 그리고 RELEASE 는 최소한의 "공개"
+ 처럼 동작한다는 의미입니다.
+
+atomic_ops.txt 에서 설명되는 어토믹 오퍼레이션들 중에는 완전히 순서잡힌 것들과
+(배리어를 사용하지 않는) 완화된 순서의 것들 외에 ACQUIRE 와 RELEASE 부류의
+것들도 존재합니다. 로드와 스토어를 모두 수행하는 조합된 어토믹 오퍼레이션에서,
+ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE 는 해당
+오퍼레이션의 스토어 부분에만 적용됩니다.
+
+메모리 배리어들은 두 CPU 간, 또는 CPU 와 디바이스 간에 상호작용의 가능성이 있을
+때에만 필요합니다. 만약 어떤 코드에 그런 상호작용이 없을 것이 보장된다면, 해당
+코드에서는 메모리 배리어를 사용할 필요가 없습니다.
+
+
+이것들은 _최소한의_ 보장사항들임을 알아두세요. 다른 아키텍쳐에서는 더 강력한
+보장사항을 제공할 수도 있습니다만, 그런 보장사항은 아키텍쳐 종속적 코드 이외의
+부분에서는 신뢰되지 _않을_ 겁니다.
+
+
+메모리 배리어에 대해 가정해선 안될 것
+-------------------------------------
+
+리눅스 커널 메모리 배리어들이 보장하지 않는 것들이 있습니다:
+
+ (*) 메모리 배리어 앞에서 명시된 어떤 메모리 액세스도 메모리 배리어 명령의 수행
+ 완료 시점까지 _완료_ 될 것이란 보장은 없습니다; 배리어가 하는 일은 CPU 의
+ 액세스 큐에 특정 타입의 액세스들은 넘을 수 없는 선을 긋는 것으로 생각될 수
+ 있습니다.
+
+ (*) 한 CPU 에서 메모리 배리어를 수행하는게 시스템의 다른 CPU 나 하드웨어에
+ 어떤 직접적인 영향을 끼친다는 보장은 존재하지 않습니다. 배리어 수행이
+ 만드는 간접적 영향은 두번째 CPU 가 첫번째 CPU 의 액세스들의 결과를
+ 바라보는 순서가 됩니다만, 다음 항목을 보세요:
+
+ (*) 첫번째 CPU 가 두번째 CPU 의 메모리 액세스들의 결과를 바라볼 때, _설령_
+ 두번째 CPU 가 메모리 배리어를 사용한다 해도, 첫번째 CPU _또한_ 그에 맞는
+ 메모리 배리어를 사용하지 않는다면 ("SMP 배리어 짝맞추기" 서브섹션을
+ 참고하세요) 그 결과가 올바른 순서로 보여진다는 보장은 없습니다.
+
+ (*) CPU 바깥의 하드웨어[*] 가 메모리 액세스들의 순서를 바꾸지 않는다는 보장은
+ 존재하지 않습니다. CPU 캐시 일관성 메커니즘은 메모리 배리어의 간접적
+ 영향을 CPU 사이에 전파하긴 하지만, 순서대로 전파하지는 않을 수 있습니다.
+
+ [*] 버스 마스터링 DMA 와 일관성에 대해서는 다음을 참고하시기 바랍니다:
+
+ Documentation/PCI/pci.txt
+ Documentation/DMA-API-HOWTO.txt
+ Documentation/DMA-API.txt
+
+
+데이터 의존성 배리어
+--------------------
+
+데이터 의존성 배리어의 사용에 있어 지켜야 하는 사항들은 약간 미묘하고, 데이터
+의존성 배리어가 사용되어야 하는 상황도 항상 명백하지는 않습니다. 설명을 위해
+다음의 이벤트 시퀀스를 생각해 봅시다:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { A == 1, B == 2, C == 3, P == &A, Q == &C }
+ B = 4;
+ <쓰기 배리어>
+ WRITE_ONCE(P, &B)
+ Q = READ_ONCE(P);
+ D = *Q;
+
+여기엔 분명한 데이터 의존성이 존재하므로, 이 시퀀스가 끝났을 때 Q 는 &A 또는 &B
+일 것이고, 따라서:
+
+ (Q == &A) 는 (D == 1) 를,
+ (Q == &B) 는 (D == 4) 를 의미합니다.
+
+하지만! CPU 2 는 B 의 업데이트를 인식하기 전에 P 의 업데이트를 인식할 수 있고,
+따라서 다음의 결과가 가능합니다:
+
+ (Q == &B) and (D == 2) ????
+
+이런 결과는 일관성이나 인과 관계 유지가 실패한 것처럼 보일 수도 있겠지만,
+그렇지 않습니다, 그리고 이 현상은 (DEC Alpha 와 같은) 여러 CPU 에서 실제로
+발견될 수 있습니다.
+
+이 문제 상황을 제대로 해결하기 위해, 데이터 의존성 배리어나 그보다 강화된
+무언가가 주소를 읽어올 때와 데이터를 읽어올 때 사이에 추가되어야만 합니다:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { A == 1, B == 2, C == 3, P == &A, Q == &C }
+ B = 4;
+ <쓰기 배리어>
+ WRITE_ONCE(P, &B);
+ Q = READ_ONCE(P);
+ <데이터 의존성 배리어>
+ D = *Q;
+
+이 변경은 앞의 처음 두가지 결과 중 하나만이 발생할 수 있고, 세번째의 결과는
+발생할 수 없도록 합니다.
+
+데이터 의존성 배리어는 의존적 쓰기에 대해서도 순서를 잡아줍니다:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { A == 1, B == 2, C = 3, P == &A, Q == &C }
+ B = 4;
+ <쓰기 배리어>
+ WRITE_ONCE(P, &B);
+ Q = READ_ONCE(P);
+ <데이터 의존성 배리어>
+ *Q = 5;
+
+이 데이터 의존성 배리어는 Q 로의 읽기가 *Q 로의 스토어와 순서를 맞추게
+해줍니다. 이는 다음과 같은 결과를 막습니다:
+
+ (Q == &B) && (B == 4)
+
+이런 패턴은 드물게 사용되어야 함을 알아 두시기 바랍니다. 무엇보다도, 의존성
+순서 규칙의 의도는 쓰기 작업을 -예방- 해서 그로 인해 발생하는 비싼 캐시 미스도
+없애려는 것입니다. 이 패턴은 드물게 발생하는 에러 조건 같은것들을 기록하는데
+사용될 수 있고, 이렇게 배리어를 사용해 순서를 지키게 함으로써 그런 기록이
+사라지는 것을 막습니다.
+
+
+[!] 상당히 비직관적인 이 상황은 분리된 캐시를 가진 기계, 예를 들어 한 캐시
+뱅크가 짝수번 캐시 라인을 처리하고 다른 뱅크는 홀수번 캐시 라인을 처리하는 기계
+등에서 가장 잘 발생합니다. 포인터 P 는 홀수 번호의 캐시 라인에 있고, 변수 B 는
+짝수 번호 캐시 라인에 있다고 생각해 봅시다. 그런 상태에서 읽기 작업을 하는 CPU
+의 짝수번 뱅크는 할 일이 쌓여 매우 바쁘지만 홀수번 뱅크는 할 일이 없어 아무
+일도 하지 않고 있었다면, 포인터 P 는 새 값 (&B) 을, 그리고 변수 B 는 옛날 값
+(2) 을 가지고 있는 상태가 보여질 수도 있습니다.
+
+
+데이터 의존성 배리어는 매우 중요한데, 예를 들어 RCU 시스템에서 그렇습니다.
+include/linux/rcupdate.h 의 rcu_assign_pointer() 와 rcu_dereference() 를
+참고하세요. 여기서 데이터 의존성 배리어는 RCU 로 관리되는 포인터의 타겟을 현재
+타겟에서 수정된 새로운 타겟으로 바꾸는 작업에서 새로 수정된 타겟이 초기화가
+완료되지 않은 채로 보여지는 일이 일어나지 않게 해줍니다.
+
+더 많은 예를 위해선 "캐시 일관성" 서브섹션을 참고하세요.
+
+
+컨트롤 의존성
+-------------
+
+로드-로드 컨트롤 의존성은 데이터 의존성 배리어만으로는 정확히 동작할 수가
+없어서 읽기 메모리 배리어를 필요로 합니다. 아래의 코드를 봅시다:
+
+ q = READ_ONCE(a);
+ if (q) {
+ <데이터 의존성 배리어> /* BUG: No data dependency!!! */
+ p = READ_ONCE(b);
+ }
+
+이 코드는 원하는 대로의 효과를 내지 못할 수 있는데, 이 코드에는 데이터 의존성이
+아니라 컨트롤 의존성이 존재하기 때문으로, 이런 상황에서 CPU 는 실행 속도를 더
+빠르게 하기 위해 분기 조건의 결과를 예측하고 코드를 재배치 할 수 있어서 다른
+CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레이션보다 먼저 발생한
+걸로 인식할 수 있습니다. 여기에 정말로 필요했던 건 다음과 같습니다:
+
+ q = READ_ONCE(a);
+ if (q) {
+ <읽기 배리어>
+ p = READ_ONCE(b);
+ }
+
+하지만, 스토어 오퍼레이션은 예측적으로 수행되지 않습니다. 즉, 다음 예에서와
+같이 로드-스토어 컨트롤 의존성이 존재하는 경우에는 순서가 -지켜진다-는
+의미입니다.
+
+ q = READ_ONCE(a);
+ if (q) {
+ WRITE_ONCE(b, p);
+ }
+
+컨트롤 의존성은 보통 다른 타입의 배리어들과 짝을 맞춰 사용됩니다. 그렇다곤
+하나, READ_ONCE() 는 반드시 사용해야 함을 부디 명심하세요! READ_ONCE() 가
+없다면, 컴파일러가 'a' 로부터의 로드를 'a' 로부터의 또다른 로드와, 'b' 로의
+스토어를 'b' 로의 또다른 스토어와 조합해 버려 매우 비직관적인 결과를 초래할 수
+있습니다.
+
+이걸로 끝이 아닌게, 컴파일러가 변수 'a' 의 값이 항상 0이 아니라고 증명할 수
+있다면, 앞의 예에서 "if" 문을 없애서 다음과 같이 최적화 할 수도 있습니다:
+
+ q = a;
+ b = p; /* BUG: Compiler and CPU can both reorder!!! */
+
+그러니 READ_ONCE() 를 반드시 사용하세요.
+
+다음과 같이 "if" 문의 양갈래 브랜치에 모두 존재하는 동일한 스토어에 대해 순서를
+강제하고 싶은 경우가 있을 수 있습니다:
+
+ q = READ_ONCE(a);
+ if (q) {
+ barrier();
+ WRITE_ONCE(b, p);
+ do_something();
+ } else {
+ barrier();
+ WRITE_ONCE(b, p);
+ do_something_else();
+ }
+
+안타깝게도, 현재의 컴파일러들은 높은 최적화 레벨에서는 이걸 다음과 같이
+바꿔버립니다:
+
+ q = READ_ONCE(a);
+ barrier();
+ WRITE_ONCE(b, p); /* BUG: No ordering vs. load from a!!! */
+ if (q) {
+ /* WRITE_ONCE(b, p); -- moved up, BUG!!! */
+ do_something();
+ } else {
+ /* WRITE_ONCE(b, p); -- moved up, BUG!!! */
+ do_something_else();
+ }
+
+이제 'a' 에서의 로드와 'b' 로의 스토어 사이에는 조건적 관계가 없기 때문에 CPU
+는 이들의 순서를 바꿀 수 있게 됩니다: 이런 경우에 조건적 관계는 반드시
+필요한데, 모든 컴파일러 최적화가 이루어지고 난 후의 어셈블리 코드에서도
+마찬가지입니다. 따라서, 이 예에서 순서를 지키기 위해서는 smp_store_release()
+와 같은 명시적 메모리 배리어가 필요합니다:
+
+ q = READ_ONCE(a);
+ if (q) {
+ smp_store_release(&b, p);
+ do_something();
+ } else {
+ smp_store_release(&b, p);
+ do_something_else();
+ }
+
+반면에 명시적 메모리 배리어가 없다면, 이런 경우의 순서는 스토어 오퍼레이션들이
+서로 다를 때에만 보장되는데, 예를 들면 다음과 같은 경우입니다:
+
+ q = READ_ONCE(a);
+ if (q) {
+ WRITE_ONCE(b, p);
+ do_something();
+ } else {
+ WRITE_ONCE(b, r);
+ do_something_else();
+ }
+
+처음의 READ_ONCE() 는 컴파일러가 'a' 의 값을 증명해내는 것을 막기 위해 여전히
+필요합니다.
+
+또한, 로컬 변수 'q' 를 가지고 하는 일에 대해 주의해야 하는데, 그러지 않으면
+컴파일러는 그 값을 추측하고 또다시 필요한 조건관계를 없애버릴 수 있습니다.
+예를 들면:
+
+ q = READ_ONCE(a);
+ if (q % MAX) {
+ WRITE_ONCE(b, p);
+ do_something();
+ } else {
+ WRITE_ONCE(b, r);
+ do_something_else();
+ }
+
+만약 MAX 가 1 로 정의된 상수라면, 컴파일러는 (q % MAX) 는 0이란 것을 알아채고,
+위의 코드를 아래와 같이 바꿔버릴 수 있습니다:
+
+ q = READ_ONCE(a);
+ WRITE_ONCE(b, p);
+ do_something_else();
+
+이렇게 되면, CPU 는 변수 'a' 로부터의 로드와 변수 'b' 로의 스토어 사이의 순서를
+지켜줄 필요가 없어집니다. barrier() 를 추가해 해결해 보고 싶겠지만, 그건
+도움이 안됩니다. 조건 관계는 사라졌고, barrier() 는 이를 되돌리지 못합니다.
+따라서, 이 순서를 지켜야 한다면, MAX 가 1 보다 크다는 것을, 다음과 같은 방법을
+사용해 분명히 해야 합니다:
+
+ q = READ_ONCE(a);
+ BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
+ if (q % MAX) {
+ WRITE_ONCE(b, p);
+ do_something();
+ } else {
+ WRITE_ONCE(b, r);
+ do_something_else();
+ }
+
+'b' 로의 스토어들은 여전히 서로 다름을 알아두세요. 만약 그것들이 동일하면,
+앞에서 이야기했듯, 컴파일러가 그 스토어 오퍼레이션들을 'if' 문 바깥으로
+끄집어낼 수 있습니다.
+
+또한 이진 조건문 평가에 너무 의존하지 않도록 조심해야 합니다. 다음의 예를
+봅시다:
+
+ q = READ_ONCE(a);
+ if (q || 1 > 0)
+ WRITE_ONCE(b, 1);
+
+첫번째 조건만으로는 브랜치 조건 전체를 거짓으로 만들 수 없고 두번째 조건은 항상
+참이기 때문에, 컴파일러는 이 예를 다음과 같이 바꿔서 컨트롤 의존성을 없애버릴
+수 있습니다:
+
+ q = READ_ONCE(a);
+ WRITE_ONCE(b, 1);
+
+이 예는 컴파일러가 코드를 추측으로 수정할 수 없도록 분명히 해야 한다는 점을
+강조합니다. 조금 더 일반적으로 말해서, READ_ONCE() 는 컴파일러에게 주어진 로드
+오퍼레이션을 위한 코드를 정말로 만들도록 하지만, 컴파일러가 그렇게 만들어진
+코드의 수행 결과를 사용하도록 강제하지는 않습니다.
+
+마지막으로, 컨트롤 의존성은 이행성 (transitivity) 을 제공하지 -않습니다-. 이건
+x 와 y 가 둘 다 0 이라는 초기값을 가졌다는 가정 하의 두개의 예제로
+보이겠습니다:
+
+ CPU 0 CPU 1
+ ======================= =======================
+ r1 = READ_ONCE(x); r2 = READ_ONCE(y);
+ if (r1 > 0) if (r2 > 0)
+ WRITE_ONCE(y, 1); WRITE_ONCE(x, 1);
+
+ assert(!(r1 == 1 && r2 == 1));
+
+이 두 CPU 예제에서 assert() 의 조건은 항상 참일 것입니다. 그리고, 만약 컨트롤
+의존성이 이행성을 (실제로는 그러지 않지만) 보장한다면, 다음의 CPU 가 추가되어도
+아래의 assert() 조건은 참이 될것입니다:
+
+ CPU 2
+ =====================
+ WRITE_ONCE(x, 2);
+
+ assert(!(r1 == 2 && r2 == 1 && x == 2)); /* FAILS!!! */
+
+하지만 컨트롤 의존성은 이행성을 제공하지 -않기- 때문에, 세개의 CPU 예제가 실행
+완료된 후에 위의 assert() 의 조건은 거짓으로 평가될 수 있습니다. 세개의 CPU
+예제가 순서를 지키길 원한다면, CPU 0 와 CPU 1 코드의 로드와 스토어 사이, "if"
+문 바로 다음에 smp_mb()를 넣어야 합니다. 더 나아가서, 최초의 두 CPU 예제는
+매우 위험하므로 사용되지 않아야 합니다.
+
+이 두개의 예제는 다음 논문:
+http://www.cl.cam.ac.uk/users/pes20/ppc-supplemental/test6.pdf 와
+이 사이트: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html 에 나온 LB 와 WWC
+리트머스 테스트입니다.
+
+요약하자면:
+
+ (*) 컨트롤 의존성은 앞의 로드들을 뒤의 스토어들에 대해 순서를 맞춰줍니다.
+ 하지만, 그 외의 어떤 순서도 보장하지 -않습니다-: 앞의 로드와 뒤의 로드들
+ 사이에도, 앞의 스토어와 뒤의 스토어들 사이에도요. 이런 다른 형태의
+ 순서가 필요하다면 smp_rmb() 나 smp_wmb()를, 또는, 앞의 스토어들과 뒤의
+ 로드들 사이의 순서를 위해서는 smp_mb() 를 사용하세요.
+
+ (*) "if" 문의 양갈래 브랜치가 같은 변수에의 동일한 스토어로 시작한다면, 그
+ 스토어들은 각 스토어 앞에 smp_mb() 를 넣거나 smp_store_release() 를
+ 사용해서 스토어를 하는 식으로 순서를 맞춰줘야 합니다. 이 문제를 해결하기
+ 위해 "if" 문의 양갈래 브랜치의 시작 지점에 barrier() 를 넣는 것만으로는
+ 충분한 해결이 되지 않는데, 이는 앞의 예에서 본것과 같이, 컴파일러의
+ 최적화는 barrier() 가 의미하는 바를 지키면서도 컨트롤 의존성을 손상시킬
+ 수 있기 때문이라는 점을 부디 알아두시기 바랍니다.
+
+ (*) 컨트롤 의존성은 앞의 로드와 뒤의 스토어 사이에 최소 하나의, 실행
+ 시점에서의 조건관계를 필요로 하며, 이 조건관계는 앞의 로드와 관계되어야
+ 합니다. 만약 컴파일러가 조건 관계를 최적화로 없앨수 있다면, 순서도
+ 최적화로 없애버렸을 겁니다. READ_ONCE() 와 WRITE_ONCE() 의 주의 깊은
+ 사용은 주어진 조건 관계를 유지하는데 도움이 될 수 있습니다.
+
+ (*) 컨트롤 의존성을 위해선 컴파일러가 조건관계를 없애버리는 것을 막아야
+ 합니다. 주의 깊은 READ_ONCE() 나 atomic{,64}_read() 의 사용이 컨트롤
+ 의존성이 사라지지 않게 하는데 도움을 줄 수 있습니다. 더 많은 정보를
+ 위해선 "컴파일러 배리어" 섹션을 참고하시기 바랍니다.
+
+ (*) 컨트롤 의존성은 보통 다른 타입의 배리어들과 짝을 맞춰 사용됩니다.
+
+ (*) 컨트롤 의존성은 이행성을 제공하지 -않습니다-. 이행성이 필요하다면,
+ smp_mb() 를 사용하세요.
+
+
+SMP 배리어 짝맞추기
+--------------------
+
+CPU 간 상호작용을 다룰 때에 일부 타입의 메모리 배리어는 항상 짝을 맞춰
+사용되어야 합니다. 적절하게 짝을 맞추지 않은 코드는 사실상 에러에 가깝습니다.
+
+범용 배리어들은 범용 배리어끼리도 짝을 맞추지만 이행성이 없는 대부분의 다른
+타입의 배리어들과도 짝을 맞춥니다. ACQUIRE 배리어는 RELEASE 배리어와 짝을
+맞춥니다만, 둘 다 범용 배리어를 포함해 다른 배리어들과도 짝을 맞출 수 있습니다.
+쓰기 배리어는 데이터 의존성 배리어나 컨트롤 의존성, ACQUIRE 배리어, RELEASE
+배리어, 읽기 배리어, 또는 범용 배리어와 짝을 맞춥니다. 비슷하게 읽기 배리어나
+컨트롤 의존성, 또는 데이터 의존성 배리어는 쓰기 배리어나 ACQUIRE 배리어,
+RELEASE 배리어, 또는 범용 배리어와 짝을 맞추는데, 다음과 같습니다:
+
+ CPU 1 CPU 2
+ =============== ===============
+ WRITE_ONCE(a, 1);
+ <쓰기 배리어>
+ WRITE_ONCE(b, 2); x = READ_ONCE(b);
+ <읽기 배리어>
+ y = READ_ONCE(a);
+
+또는:
+
+ CPU 1 CPU 2
+ =============== ===============================
+ a = 1;
+ <쓰기 배리어>
+ WRITE_ONCE(b, &a); x = READ_ONCE(b);
+ <데이터 의존성 배리어>
+ y = *x;
+
+또는:
+
+ CPU 1 CPU 2
+ =============== ===============================
+ r1 = READ_ONCE(y);
+ <범용 배리어>
+ WRITE_ONCE(y, 1); if (r2 = READ_ONCE(x)) {
+ <묵시적 컨트롤 의존성>
+ WRITE_ONCE(y, 1);
+ }
+
+ assert(r1 == 0 || r2 == 0);
+
+기본적으로, 여기서의 읽기 배리어는 "더 완화된" 타입일 순 있어도 항상 존재해야
+합니다.
+
+[!] 쓰기 배리어 앞의 스토어 오퍼레이션은 일반적으로 읽기 배리어나 데이터
+의존성 배리어 뒤의 로드 오퍼레이션과 매치될 것이고, 반대도 마찬가지입니다:
+
+ CPU 1 CPU 2
+ =================== ===================
+ WRITE_ONCE(a, 1); }---- --->{ v = READ_ONCE(c);
+ WRITE_ONCE(b, 2); } \ / { w = READ_ONCE(d);
+ <쓰기 배리어> \ <읽기 배리어>
+ WRITE_ONCE(c, 3); } / \ { x = READ_ONCE(a);
+ WRITE_ONCE(d, 4); }---- --->{ y = READ_ONCE(b);
+
+
+메모리 배리어 시퀀스의 예
+-------------------------
+
+첫째, 쓰기 배리어는 스토어 오퍼레이션들의 부분적 순서 세우기로 동작합니다.
+아래의 이벤트 시퀀스를 보세요:
+
+ CPU 1
+ =======================
+ STORE A = 1
+ STORE B = 2
+ STORE C = 3
+ <쓰기 배리어>
+ STORE D = 4
+ STORE E = 5
+
+이 이벤트 시퀀스는 메모리 일관성 시스템에 원소끼리의 순서가 존재하지 않는 집합
+{ STORE A, STORE B, STORE C } 가 역시 원소끼리의 순서가 존재하지 않는 집합
+{ STORE D, STORE E } 보다 먼저 일어난 것으로 시스템의 나머지 요소들에 보이도록
+전달됩니다:
+
+ +-------+ : :
+ | | +------+
+ | |------>| C=3 | } /\
+ | | : +------+ }----- \ -----> 시스템의 나머지 요소에
+ | | : | A=1 | } \/ 보여질 수 있는 이벤트들
+ | | : +------+ }
+ | CPU 1 | : | B=2 | }
+ | | +------+ }
+ | | wwwwwwwwwwwwwwww } <--- 여기서 쓰기 배리어는 배리어 앞의
+ | | +------+ } 모든 스토어가 배리어 뒤의 스토어
+ | | : | E=5 | } 전에 메모리 시스템에 전달되도록
+ | | : +------+ } 합니다
+ | |------>| D=4 | }
+ | | +------+
+ +-------+ : :
+ |
+ | CPU 1 에 의해 메모리 시스템에 전달되는
+ | 일련의 스토어 오퍼레이션들
+ V
+
+
+둘째, 데이터 의존성 배리어는 데이터 의존적 로드 오퍼레이션들의 부분적 순서
+세우기로 동작합니다. 다음 일련의 이벤트들을 보세요:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ { B = 7; X = 9; Y = 8; C = &Y }
+ STORE A = 1
+ STORE B = 2
+ <쓰기 배리어>
+ STORE C = &B LOAD X
+ STORE D = 4 LOAD C (gets &B)
+ LOAD *C (reads B)
+
+여기에 별다른 개입이 없다면, CPU 1 의 쓰기 배리어에도 불구하고 CPU 2 는 CPU 1
+의 이벤트들을 완전히 무작위적 순서로 인지하게 됩니다:
+
+ +-------+ : : : :
+ | | +------+ +-------+ | CPU 2 에 인지되는
+ | |------>| B=2 |----- --->| Y->8 | | 업데이트 이벤트
+ | | : +------+ \ +-------+ | 시퀀스
+ | CPU 1 | : | A=1 | \ --->| C->&Y | V
+ | | +------+ | +-------+
+ | | wwwwwwwwwwwwwwww | : :
+ | | +------+ | : :
+ | | : | C=&B |--- | : : +-------+
+ | | : +------+ \ | +-------+ | |
+ | |------>| D=4 | ----------->| C->&B |------>| |
+ | | +------+ | +-------+ | |
+ +-------+ : : | : : | |
+ | : : | |
+ | : : | CPU 2 |
+ | +-------+ | |
+ 분명히 잘못된 ---> | | B->7 |------>| |
+ B 의 값 인지 (!) | +-------+ | |
+ | : : | |
+ | +-------+ | |
+ X 의 로드가 B 의 ---> \ | X->9 |------>| |
+ 일관성 유지를 \ +-------+ | |
+ 지연시킴 ----->| B->2 | +-------+
+ +-------+
+ : :
+
+
+앞의 예에서, CPU 2 는 (B 의 값이 될) *C 의 값 읽기가 C 의 LOAD 뒤에 이어짐에도
+B 가 7 이라는 결과를 얻습니다.
+
+하지만, 만약 데이터 의존성 배리어가 C 의 로드와 *C (즉, B) 의 로드 사이에
+있었다면:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ { B = 7; X = 9; Y = 8; C = &Y }
+ STORE A = 1
+ STORE B = 2
+ <쓰기 배리어>
+ STORE C = &B LOAD X
+ STORE D = 4 LOAD C (gets &B)
+ <데이터 의존성 배리어>
+ LOAD *C (reads B)
+
+다음과 같이 됩니다:
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| B=2 |----- --->| Y->8 |
+ | | : +------+ \ +-------+
+ | CPU 1 | : | A=1 | \ --->| C->&Y |
+ | | +------+ | +-------+
+ | | wwwwwwwwwwwwwwww | : :
+ | | +------+ | : :
+ | | : | C=&B |--- | : : +-------+
+ | | : +------+ \ | +-------+ | |
+ | |------>| D=4 | ----------->| C->&B |------>| |
+ | | +------+ | +-------+ | |
+ +-------+ : : | : : | |
+ | : : | |
+ | : : | CPU 2 |
+ | +-------+ | |
+ | | X->9 |------>| |
+ | +-------+ | |
+ C 로의 스토어 앞의 ---> \ ddddddddddddddddd | |
+ 모든 이벤트 결과가 \ +-------+ | |
+ 뒤의 로드에게 ----->| B->2 |------>| |
+ 보이게 강제한다 +-------+ | |
+ : : +-------+
+
+
+셋째, 읽기 배리어는 로드 오퍼레이션들에의 부분적 순서 세우기로 동작합니다.
+아래의 일련의 이벤트를 봅시다:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ { A = 0, B = 9 }
+ STORE A=1
+ <쓰기 배리어>
+ STORE B=2
+ LOAD B
+ LOAD A
+
+CPU 1 은 쓰기 배리어를 쳤지만, 별다른 개입이 없다면 CPU 2 는 CPU 1 에서 행해진
+이벤트의 결과를 무작위적 순서로 인지하게 됩니다.
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| A=1 |------ --->| A->0 |
+ | | +------+ \ +-------+
+ | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
+ | | +------+ | +-------+
+ | |------>| B=2 |--- | : :
+ | | +------+ \ | : : +-------+
+ +-------+ : : \ | +-------+ | |
+ ---------->| B->2 |------>| |
+ | +-------+ | CPU 2 |
+ | | A->0 |------>| |
+ | +-------+ | |
+ | : : +-------+
+ \ : :
+ \ +-------+
+ ---->| A->1 |
+ +-------+
+ : :
+
+
+하지만, 만약 읽기 배리어가 B 의 로드와 A 의 로드 사이에 존재한다면:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ { A = 0, B = 9 }
+ STORE A=1
+ <쓰기 배리어>
+ STORE B=2
+ LOAD B
+ <읽기 배리어>
+ LOAD A
+
+CPU 1 에 의해 만들어진 부분적 순서가 CPU 2 에도 그대로 인지됩니다:
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| A=1 |------ --->| A->0 |
+ | | +------+ \ +-------+
+ | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
+ | | +------+ | +-------+
+ | |------>| B=2 |--- | : :
+ | | +------+ \ | : : +-------+
+ +-------+ : : \ | +-------+ | |
+ ---------->| B->2 |------>| |
+ | +-------+ | CPU 2 |
+ | : : | |
+ | : : | |
+ 여기서 읽기 배리어는 ----> \ rrrrrrrrrrrrrrrrr | |
+ B 로의 스토어 전의 \ +-------+ | |
+ 모든 결과를 CPU 2 에 ---->| A->1 |------>| |
+ 보이도록 한다 +-------+ | |
+ : : +-------+
+
+
+더 완벽한 설명을 위해, A 의 로드가 읽기 배리어 앞과 뒤에 있으면 어떻게 될지
+생각해 봅시다:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ { A = 0, B = 9 }
+ STORE A=1
+ <쓰기 배리어>
+ STORE B=2
+ LOAD B
+ LOAD A [first load of A]
+ <읽기 배리어>
+ LOAD A [second load of A]
+
+A 의 로드 두개가 모두 B 의 로드 뒤에 있지만, 서로 다른 값을 얻어올 수
+있습니다:
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| A=1 |------ --->| A->0 |
+ | | +------+ \ +-------+
+ | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
+ | | +------+ | +-------+
+ | |------>| B=2 |--- | : :
+ | | +------+ \ | : : +-------+
+ +-------+ : : \ | +-------+ | |
+ ---------->| B->2 |------>| |
+ | +-------+ | CPU 2 |
+ | : : | |
+ | : : | |
+ | +-------+ | |
+ | | A->0 |------>| 1st |
+ | +-------+ | |
+ 여기서 읽기 배리어는 ----> \ rrrrrrrrrrrrrrrrr | |
+ B 로의 스토어 전의 \ +-------+ | |
+ 모든 결과를 CPU 2 에 ---->| A->1 |------>| 2nd |
+ 보이도록 한다 +-------+ | |
+ : : +-------+
+
+
+하지만 CPU 1 에서의 A 업데이트는 읽기 배리어가 완료되기 전에도 보일 수도
+있긴 합니다:
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| A=1 |------ --->| A->0 |
+ | | +------+ \ +-------+
+ | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
+ | | +------+ | +-------+
+ | |------>| B=2 |--- | : :
+ | | +------+ \ | : : +-------+
+ +-------+ : : \ | +-------+ | |
+ ---------->| B->2 |------>| |
+ | +-------+ | CPU 2 |
+ | : : | |
+ \ : : | |
+ \ +-------+ | |
+ ---->| A->1 |------>| 1st |
+ +-------+ | |
+ rrrrrrrrrrrrrrrrr | |
+ +-------+ | |
+ | A->1 |------>| 2nd |
+ +-------+ | |
+ : : +-------+
+
+
+여기서 보장되는 건, 만약 B 의 로드가 B == 2 라는 결과를 봤다면, A 에의 두번째
+로드는 항상 A == 1 을 보게 될 것이라는 겁니다. A 에의 첫번째 로드에는 그런
+보장이 없습니다; A == 0 이거나 A == 1 이거나 둘 중 하나의 결과를 보게 될겁니다.
+
+
+읽기 메모리 배리어 VS 로드 예측
+-------------------------------
+
+많은 CPU들이 로드를 예측적으로 (speculatively) 합니다: 어떤 데이터를 메모리에서
+로드해야 하게 될지 예측을 했다면, 해당 데이터를 로드하는 인스트럭션을 실제로는
+아직 만나지 않았더라도 다른 로드 작업이 없어 버스 (bus) 가 아무 일도 하고 있지
+않다면, 그 데이터를 로드합니다. 이후에 실제 로드 인스트럭션이 실행되면 CPU 가
+이미 그 값을 가지고 있기 때문에 그 로드 인스트럭션은 즉시 완료됩니다.
+
+해당 CPU 는 실제로는 그 값이 필요치 않았다는 사실이 나중에 드러날 수도 있는데 -
+해당 로드 인스트럭션이 브랜치로 우회되거나 했을 수 있겠죠 - , 그렇게 되면 앞서
+읽어둔 값을 버리거나 나중의 사용을 위해 캐시에 넣어둘 수 있습니다.
+
+다음을 생각해 봅시다:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ LOAD B
+ DIVIDE } 나누기 명령은 일반적으로
+ DIVIDE } 긴 시간을 필요로 합니다
+ LOAD A
+
+는 이렇게 될 수 있습니다:
+
+ : : +-------+
+ +-------+ | |
+ --->| B->2 |------>| |
+ +-------+ | CPU 2 |
+ : :DIVIDE | |
+ +-------+ | |
+ 나누기 하느라 바쁜 ---> --->| A->0 |~~~~ | |
+ CPU 는 A 의 LOAD 를 +-------+ ~ | |
+ 예측해서 수행한다 : : ~ | |
+ : :DIVIDE | |
+ : : ~ | |
+ 나누기가 끝나면 ---> ---> : : ~-->| |
+ CPU 는 해당 LOAD 를 : : | |
+ 즉각 완료한다 : : +-------+
+
+
+읽기 배리어나 데이터 의존성 배리어를 두번째 로드 직전에 놓는다면:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ LOAD B
+ DIVIDE
+ DIVIDE
+ <읽기 배리어>
+ LOAD A
+
+예측으로 얻어진 값은 사용된 배리어의 타입에 따라서 해당 값이 옳은지 검토되게
+됩니다. 만약 해당 메모리 영역에 변화가 없었다면, 예측으로 얻어두었던 값이
+사용됩니다:
+
+ : : +-------+
+ +-------+ | |
+ --->| B->2 |------>| |
+ +-------+ | CPU 2 |
+ : :DIVIDE | |
+ +-------+ | |
+ 나누기 하느라 바쁜 ---> --->| A->0 |~~~~ | |
+ CPU 는 A 의 LOAD 를 +-------+ ~ | |
+ 예측한다 : : ~ | |
+ : :DIVIDE | |
+ : : ~ | |
+ : : ~ | |
+ rrrrrrrrrrrrrrrr~ | |
+ : : ~ | |
+ : : ~-->| |
+ : : | |
+ : : +-------+
+
+
+하지만 다른 CPU 에서 업데이트나 무효화가 있었다면, 그 예측은 무효화되고 그 값은
+다시 읽혀집니다:
+
+ : : +-------+
+ +-------+ | |
+ --->| B->2 |------>| |
+ +-------+ | CPU 2 |
+ : :DIVIDE | |
+ +-------+ | |
+ 나누기 하느라 바쁜 ---> --->| A->0 |~~~~ | |
+ CPU 는 A 의 LOAD 를 +-------+ ~ | |
+ 예측한다 : : ~ | |
+ : :DIVIDE | |
+ : : ~ | |
+ : : ~ | |
+ rrrrrrrrrrrrrrrrr | |
+ +-------+ | |
+ 예측성 동작은 무효화 되고 ---> --->| A->1 |------>| |
+ 업데이트된 값이 다시 읽혀진다 +-------+ | |
+ : : +-------+
+
+
+이행성
+------
+
+이행성(transitivity)은 실제의 컴퓨터 시스템에서 항상 제공되지는 않는, 순서
+맞추기에 대한 상당히 직관적인 개념입니다. 다음의 예가 이행성을 보여줍니다:
+
+ CPU 1 CPU 2 CPU 3
+ ======================= ======================= =======================
+ { X = 0, Y = 0 }
+ STORE X=1 LOAD X STORE Y=1
+ <범용 배리어> <범용 배리어>
+ LOAD Y LOAD X
+
+CPU 2 의 X 로드가 1을 리턴했고 Y 로드가 0을 리턴했다고 해봅시다. 이는 CPU 2 의
+X 로드가 CPU 1 의 X 스토어 뒤에 이루어졌고 CPU 2 의 Y 로드는 CPU 3 의 Y 스토어
+전에 이루어졌음을 의미합니다. 그럼 "CPU 3 의 X 로드는 0을 리턴할 수 있나요?"
+
+CPU 2 의 X 로드는 CPU 1 의 스토어 후에 이루어졌으니, CPU 3 의 X 로드는 1을
+리턴하는게 자연스럽습니다. 이런 생각이 이행성의 한 예입니다: CPU A 에서 실행된
+로드가 CPU B 에서의 같은 변수에 대한 로드를 뒤따른다면, CPU A 의 로드는 CPU B
+의 로드가 내놓은 값과 같거나 그 후의 값을 내놓아야 합니다.
+
+리눅스 커널에서 범용 배리어의 사용은 이행성을 보장합니다. 따라서, 앞의 예에서
+CPU 2 의 X 로드가 1을, Y 로드는 0을 리턴했다면, CPU 3 의 X 로드는 반드시 1을
+리턴합니다.
+
+하지만, 읽기나 쓰기 배리어에 대해서는 이행성이 보장되지 -않습니다-. 예를 들어,
+앞의 예에서 CPU 2 의 범용 배리어가 아래처럼 읽기 배리어로 바뀐 경우를 생각해
+봅시다:
+
+ CPU 1 CPU 2 CPU 3
+ ======================= ======================= =======================
+ { X = 0, Y = 0 }
+ STORE X=1 LOAD X STORE Y=1
+ <읽기 배리어> <범용 배리어>
+ LOAD Y LOAD X
+
+이 코드는 이행성을 갖지 않습니다: 이 예에서는, CPU 2 의 X 로드가 1을
+리턴하고, Y 로드는 0을 리턴하지만 CPU 3 의 X 로드가 0을 리턴하는 것도 완전히
+합법적입니다.
+
+CPU 2 의 읽기 배리어가 자신의 읽기는 순서를 맞춰줘도, CPU 1 의 스토어와의
+순서를 맞춰준다고는 보장할 수 없다는게 핵심입니다. 따라서, CPU 1 과 CPU 2 가
+버퍼나 캐시를 공유하는 시스템에서 이 예제 코드가 실행된다면, CPU 2 는 CPU 1 이
+쓴 값에 좀 빨리 접근할 수 있을 것입니다. 따라서 CPU 1 과 CPU 2 의 접근으로
+조합된 순서를 모든 CPU 가 동의할 수 있도록 하기 위해 범용 배리어가 필요합니다.
+
+범용 배리어는 "글로벌 이행성"을 제공해서, 모든 CPU 들이 오퍼레이션들의 순서에
+동의하게 할 것입니다. 반면, release-acquire 조합은 "로컬 이행성" 만을
+제공해서, 해당 조합이 사용된 CPU 들만이 해당 액세스들의 조합된 순서에 동의함이
+보장됩니다. 예를 들어, 존경스런 Herman Hollerith 의 C 코드로 보면:
+
+ int u, v, x, y, z;
+
+ void cpu0(void)
+ {
+ r0 = smp_load_acquire(&x);
+ WRITE_ONCE(u, 1);
+ smp_store_release(&y, 1);
+ }
+
+ void cpu1(void)
+ {
+ r1 = smp_load_acquire(&y);
+ r4 = READ_ONCE(v);
+ r5 = READ_ONCE(u);
+ smp_store_release(&z, 1);
+ }
+
+ void cpu2(void)
+ {
+ r2 = smp_load_acquire(&z);
+ smp_store_release(&x, 1);
+ }
+
+ void cpu3(void)
+ {
+ WRITE_ONCE(v, 1);
+ smp_mb();
+ r3 = READ_ONCE(u);
+ }
+
+cpu0(), cpu1(), 그리고 cpu2() 는 smp_store_release()/smp_load_acquire() 쌍의
+연결을 통한 로컬 이행성에 동참하고 있으므로, 다음과 같은 결과는 나오지 않을
+겁니다:
+
+ r0 == 1 && r1 == 1 && r2 == 1
+
+더 나아가서, cpu0() 와 cpu1() 사이의 release-acquire 관계로 인해, cpu1() 은
+cpu0() 의 쓰기를 봐야만 하므로, 다음과 같은 결과도 없을 겁니다:
+
+ r1 == 1 && r5 == 0
+
+하지만, release-acquire 타동성은 동참한 CPU 들에만 적용되므로 cpu3() 에는
+적용되지 않습니다. 따라서, 다음과 같은 결과가 가능합니다:
+
+ r0 == 0 && r1 == 1 && r2 == 1 && r3 == 0 && r4 == 0
+
+비슷하게, 다음과 같은 결과도 가능합니다:
+
+ r0 == 0 && r1 == 1 && r2 == 1 && r3 == 0 && r4 == 0 && r5 == 1
+
+cpu0(), cpu1(), 그리고 cpu2() 는 그들의 읽기와 쓰기를 순서대로 보게 되지만,
+release-acquire 체인에 관여되지 않은 CPU 들은 그 순서에 이견을 가질 수
+있습니다. 이런 이견은 smp_load_acquire() 와 smp_store_release() 의 구현에
+사용되는 완화된 메모리 배리어 인스트럭션들은 항상 배리어 앞의 스토어들을 뒤의
+로드들에 앞세울 필요는 없다는 사실에서 기인합니다. 이 말은 cpu3() 는 cpu0() 의
+u 로의 스토어를 cpu1() 의 v 로부터의 로드 뒤에 일어난 것으로 볼 수 있다는
+뜻입니다, cpu0() 와 cpu1() 은 이 두 오퍼레이션이 의도된 순서대로 일어났음에
+모두 동의하는데도 말입니다.
+
+하지만, smp_load_acquire() 는 마술이 아님을 명심하시기 바랍니다. 구체적으로,
+이 함수는 단순히 순서 규칙을 지키며 인자로부터의 읽기를 수행합니다. 이것은
+어떤 특정한 값이 읽힐 것인지는 보장하지 -않습니다-. 따라서, 다음과 같은 결과도
+가능합니다:
+
+ r0 == 0 && r1 == 0 && r2 == 0 && r5 == 0
+
+이런 결과는 어떤 것도 재배치 되지 않는, 순차적 일관성을 가진 가상의
+시스템에서도 일어날 수 있음을 기억해 두시기 바랍니다.
+
+다시 말하지만, 당신의 코드가 글로벌 이행성을 필요로 한다면, 범용 배리어를
+사용하십시오.
+
+
+==================
+명시적 커널 배리어
+==================
+
+리눅스 커널은 서로 다른 단계에서 동작하는 다양한 배리어들을 가지고 있습니다:
+
+ (*) 컴파일러 배리어.
+
+ (*) CPU 메모리 배리어.
+
+ (*) MMIO 쓰기 배리어.
+
+
+컴파일러 배리어
+---------------
+
+리눅스 커널은 컴파일러가 메모리 액세스를 재배치 하는 것을 막아주는 명시적인
+컴파일러 배리어를 가지고 있습니다:
+
+ barrier();
+
+이건 범용 배리어입니다 -- barrier() 의 읽기-읽기 나 쓰기-쓰기 변종은 없습니다.
+하지만, READ_ONCE() 와 WRITE_ONCE() 는 특정 액세스들에 대해서만 동작하는
+barrier() 의 완화된 형태로 볼 수 있습니다.
+
+barrier() 함수는 다음과 같은 효과를 갖습니다:
+
+ (*) 컴파일러가 barrier() 뒤의 액세스들이 barrier() 앞의 액세스보다 앞으로
+ 재배치되지 못하게 합니다. 예를 들어, 인터럽트 핸들러 코드와 인터럽트 당한
+ 코드 사이의 통신을 신중히 하기 위해 사용될 수 있습니다.
+
+ (*) 루프에서, 컴파일러가 루프 조건에 사용된 변수를 매 이터레이션마다
+ 메모리에서 로드하지 않아도 되도록 최적화 하는걸 방지합니다.
+
+READ_ONCE() 와 WRITE_ONCE() 함수는 싱글 쓰레드 코드에서는 문제 없지만 동시성이
+있는 코드에서는 문제가 될 수 있는 모든 최적화를 막습니다. 이런 류의 최적화에
+대한 예를 몇가지 들어보면 다음과 같습니다:
+
+ (*) 컴파일러는 같은 변수에 대한 로드와 스토어를 재배치 할 수 있고, 어떤
+ 경우에는 CPU가 같은 변수로부터의 로드들을 재배치할 수도 있습니다. 이는
+ 다음의 코드가:
+
+ a[0] = x;
+ a[1] = x;
+
+ x 의 예전 값이 a[1] 에, 새 값이 a[0] 에 있게 할 수 있다는 뜻입니다.
+ 컴파일러와 CPU가 이런 일을 못하게 하려면 다음과 같이 해야 합니다:
+
+ a[0] = READ_ONCE(x);
+ a[1] = READ_ONCE(x);
+
+ 즉, READ_ONCE() 와 WRITE_ONCE() 는 여러 CPU 에서 하나의 변수에 가해지는
+ 액세스들에 캐시 일관성을 제공합니다.
+
+ (*) 컴파일러는 같은 변수에 대한 연속적인 로드들을 병합할 수 있습니다. 그런
+ 병합 작업으로 컴파일러는 다음의 코드를:
+
+ while (tmp = a)
+ do_something_with(tmp);
+
+ 다음과 같이, 싱글 쓰레드 코드에서는 말이 되지만 개발자의 의도와 전혀 맞지
+ 않는 방향으로 "최적화" 할 수 있습니다:
+
+ if (tmp = a)
+ for (;;)
+ do_something_with(tmp);
+
+ 컴파일러가 이런 짓을 하지 못하게 하려면 READ_ONCE() 를 사용하세요:
+
+ while (tmp = READ_ONCE(a))
+ do_something_with(tmp);
+
+ (*) 예컨대 레지스터 사용량이 많아 컴파일러가 모든 데이터를 레지스터에 담을 수
+ 없는 경우, 컴파일러는 변수를 다시 로드할 수 있습니다. 따라서 컴파일러는
+ 앞의 예에서 변수 'tmp' 사용을 최적화로 없애버릴 수 있습니다:
+
+ while (tmp = a)
+ do_something_with(tmp);
+
+ 이 코드는 다음과 같이 싱글 쓰레드에서는 완벽하지만 동시성이 존재하는
+ 경우엔 치명적인 코드로 바뀔 수 있습니다:
+
+ while (a)
+ do_something_with(a);
+
+ 예를 들어, 최적화된 이 코드는 변수 a 가 다른 CPU 에 의해 "while" 문과
+ do_something_with() 호출 사이에 바뀌어 do_something_with() 에 0을 넘길
+ 수도 있습니다.
+
+ 이번에도, 컴파일러가 그런 짓을 하는걸 막기 위해 READ_ONCE() 를 사용하세요:
+
+ while (tmp = READ_ONCE(a))
+ do_something_with(tmp);
+
+ 레지스터가 부족한 상황을 겪는 경우, 컴파일러는 tmp 를 스택에 저장해둘 수도
+ 있습니다. 컴파일러가 변수를 다시 읽어들이는건 이렇게 저장해두고 후에 다시
+ 읽어들이는데 드는 오버헤드 때문입니다. 그렇게 하는게 싱글 쓰레드
+ 코드에서는 안전하므로, 안전하지 않은 경우에는 컴파일러에게 직접 알려줘야
+ 합니다.
+
+ (*) 컴파일러는 그 값이 무엇일지 알고 있다면 로드를 아예 안할 수도 있습니다.
+ 예를 들어, 다음의 코드는 변수 'a' 의 값이 항상 0임을 증명할 수 있다면:
+
+ while (tmp = a)
+ do_something_with(tmp);
+
+ 이렇게 최적화 되어버릴 수 있습니다:
+
+ do { } while (0);
+
+ 이 변환은 싱글 쓰레드 코드에서는 도움이 되는데 로드와 브랜치를 제거했기
+ 때문입니다. 문제는 컴파일러가 'a' 의 값을 업데이트 하는건 현재의 CPU 하나
+ 뿐이라는 가정 위에서 증명을 했다는데 있습니다. 만약 변수 'a' 가 공유되어
+ 있다면, 컴파일러의 증명은 틀린 것이 될겁니다. 컴파일러는 그 자신이
+ 생각하는 것만큼 많은 것을 알고 있지 못함을 컴파일러에게 알리기 위해
+ READ_ONCE() 를 사용하세요:
+
+ while (tmp = READ_ONCE(a))
+ do_something_with(tmp);
+
+ 하지만 컴파일러는 READ_ONCE() 뒤에 나오는 값에 대해서도 눈길을 두고 있음을
+ 기억하세요. 예를 들어, 다음의 코드에서 MAX 는 전처리기 매크로로, 1의 값을
+ 갖는다고 해봅시다:
+
+ while ((tmp = READ_ONCE(a)) % MAX)
+ do_something_with(tmp);
+
+ 이렇게 되면 컴파일러는 MAX 를 가지고 수행되는 "%" 오퍼레이터의 결과가 항상
+ 0이라는 것을 알게 되고, 컴파일러가 코드를 실질적으로는 존재하지 않는
+ 것처럼 최적화 하는 것이 허용되어 버립니다. ('a' 변수의 로드는 여전히
+ 행해질 겁니다.)
+
+ (*) 비슷하게, 컴파일러는 변수가 저장하려 하는 값을 이미 가지고 있다는 것을
+ 알면 스토어 자체를 제거할 수 있습니다. 이번에도, 컴파일러는 현재의 CPU
+ 만이 그 변수에 값을 쓰는 오로지 하나의 존재라고 생각하여 공유된 변수에
+ 대해서는 잘못된 일을 하게 됩니다. 예를 들어, 다음과 같은 경우가 있을 수
+ 있습니다:
+
+ a = 0;
+ ... 변수 a 에 스토어를 하지 않는 코드 ...
+ a = 0;
+
+ 컴파일러는 변수 'a' 의 값은 이미 0이라는 것을 알고, 따라서 두번째 스토어를
+ 삭제할 겁니다. 만약 다른 CPU 가 그 사이 변수 'a' 에 다른 값을 썼다면
+ 황당한 결과가 나올 겁니다.
+
+ 컴파일러가 그런 잘못된 추측을 하지 않도록 WRITE_ONCE() 를 사용하세요:
+
+ WRITE_ONCE(a, 0);
+ ... 변수 a 에 스토어를 하지 않는 코드 ...
+ WRITE_ONCE(a, 0);
+
+ (*) 컴파일러는 하지 말라고 하지 않으면 메모리 액세스들을 재배치 할 수
+ 있습니다. 예를 들어, 다음의 프로세스 레벨 코드와 인터럽트 핸들러 사이의
+ 상호작용을 생각해 봅시다:
+
+ void process_level(void)
+ {
+ msg = get_message();
+ flag = true;
+ }
+
+ void interrupt_handler(void)
+ {
+ if (flag)
+ process_message(msg);
+ }
+
+ 이 코드에는 컴파일러가 process_level() 을 다음과 같이 변환하는 것을 막을
+ 수단이 없고, 이런 변환은 싱글쓰레드에서라면 실제로 훌륭한 선택일 수
+ 있습니다:
+
+ void process_level(void)
+ {
+ flag = true;
+ msg = get_message();
+ }
+
+ 이 두개의 문장 사이에 인터럽트가 발생한다면, interrupt_handler() 는 의미를
+ 알 수 없는 메세지를 받을 수도 있습니다. 이걸 막기 위해 다음과 같이
+ WRITE_ONCE() 를 사용하세요:
+
+ void process_level(void)
+ {
+ WRITE_ONCE(msg, get_message());
+ WRITE_ONCE(flag, true);
+ }
+
+ void interrupt_handler(void)
+ {
+ if (READ_ONCE(flag))
+ process_message(READ_ONCE(msg));
+ }
+
+ interrupt_handler() 안에서도 중첩된 인터럽트나 NMI 와 같이 인터럽트 핸들러
+ 역시 'flag' 와 'msg' 에 접근하는 또다른 무언가에 인터럽트 될 수 있다면
+ READ_ONCE() 와 WRITE_ONCE() 를 사용해야 함을 기억해 두세요. 만약 그런
+ 가능성이 없다면, interrupt_handler() 안에서는 문서화 목적이 아니라면
+ READ_ONCE() 와 WRITE_ONCE() 는 필요치 않습니다. (근래의 리눅스 커널에서
+ 중첩된 인터럽트는 보통 잘 일어나지 않음도 기억해 두세요, 실제로, 어떤
+ 인터럽트 핸들러가 인터럽트가 활성화된 채로 리턴하면 WARN_ONCE() 가
+ 실행됩니다.)
+
+ 컴파일러는 READ_ONCE() 와 WRITE_ONCE() 뒤의 READ_ONCE() 나 WRITE_ONCE(),
+ barrier(), 또는 비슷한 것들을 담고 있지 않은 코드를 움직일 수 있을 것으로
+ 가정되어야 합니다.
+
+ 이 효과는 barrier() 를 통해서도 만들 수 있지만, READ_ONCE() 와
+ WRITE_ONCE() 가 좀 더 안목 높은 선택입니다: READ_ONCE() 와 WRITE_ONCE()는
+ 컴파일러에 주어진 메모리 영역에 대해서만 최적화 가능성을 포기하도록
+ 하지만, barrier() 는 컴파일러가 지금까지 기계의 레지스터에 캐시해 놓은
+ 모든 메모리 영역의 값을 버려야 하게 하기 때문입니다. 물론, 컴파일러는
+ READ_ONCE() 와 WRITE_ONCE() 가 일어난 순서도 지켜줍니다, CPU 는 당연히
+ 그 순서를 지킬 의무가 없지만요.
+
+ (*) 컴파일러는 다음의 예에서와 같이 변수에의 스토어를 날조해낼 수도 있습니다:
+
+ if (a)
+ b = a;
+ else
+ b = 42;
+
+ 컴파일러는 아래와 같은 최적화로 브랜치를 줄일 겁니다:
+
+ b = 42;
+ if (a)
+ b = a;
+
+ 싱글 쓰레드 코드에서 이 최적화는 안전할 뿐 아니라 브랜치 갯수를
+ 줄여줍니다. 하지만 안타깝게도, 동시성이 있는 코드에서는 이 최적화는 다른
+ CPU 가 'b' 를 로드할 때, -- 'a' 가 0이 아닌데도 -- 가짜인 값, 42를 보게
+ 되는 경우를 가능하게 합니다. 이걸 방지하기 위해 WRITE_ONCE() 를
+ 사용하세요:
+
+ if (a)
+ WRITE_ONCE(b, a);
+ else
+ WRITE_ONCE(b, 42);
+
+ 컴파일러는 로드를 만들어낼 수도 있습니다. 일반적으로는 문제를 일으키지
+ 않지만, 캐시 라인 바운싱을 일으켜 성능과 확장성을 떨어뜨릴 수 있습니다.
+ 날조된 로드를 막기 위해선 READ_ONCE() 를 사용하세요.
+
+ (*) 정렬된 메모리 주소에 위치한, 한번의 메모리 참조 인스트럭션으로 액세스
+ 가능한 크기의 데이터는 하나의 큰 액세스가 여러개의 작은 액세스들로
+ 대체되는 "로드 티어링(load tearing)" 과 "스토어 티어링(store tearing)" 을
+ 방지합니다. 예를 들어, 주어진 아키텍쳐가 7-bit imeediate field 를 갖는
+ 16-bit 스토어 인스트럭션을 제공한다면, 컴파일러는 다음의 32-bit 스토어를
+ 구현하는데에 두개의 16-bit store-immediate 명령을 사용하려 할겁니다:
+
+ p = 0x00010002;
+
+ 스토어 할 상수를 만들고 그 값을 스토어 하기 위해 두개가 넘는 인스트럭션을
+ 사용하게 되는, 이런 종류의 최적화를 GCC 는 실제로 함을 부디 알아 두십시오.
+ 이 최적화는 싱글 쓰레드 코드에서는 성공적인 최적화 입니다. 실제로, 근래에
+ 발생한 (그리고 고쳐진) 버그는 GCC 가 volatile 스토어에 비정상적으로 이
+ 최적화를 사용하게 했습니다. 그런 버그가 없다면, 다음의 예에서
+ WRITE_ONCE() 의 사용은 스토어 티어링을 방지합니다:
+
+ WRITE_ONCE(p, 0x00010002);
+
+ Packed 구조체의 사용 역시 다음의 예처럼 로드 / 스토어 티어링을 유발할 수
+ 있습니다:
+
+ struct __attribute__((__packed__)) foo {
+ short a;
+ int b;
+ short c;
+ };
+ struct foo foo1, foo2;
+ ...
+
+ foo2.a = foo1.a;
+ foo2.b = foo1.b;
+ foo2.c = foo1.c;
+
+ READ_ONCE() 나 WRITE_ONCE() 도 없고 volatile 마킹도 없기 때문에,
+ 컴파일러는 이 세개의 대입문을 두개의 32-bit 로드와 두개의 32-bit 스토어로
+ 변환할 수 있습니다. 이는 'foo1.b' 의 값의 로드 티어링과 'foo2.b' 의
+ 스토어 티어링을 초래할 겁니다. 이 예에서도 READ_ONCE() 와 WRITE_ONCE()
+ 가 티어링을 막을 수 있습니다:
+
+ foo2.a = foo1.a;
+ WRITE_ONCE(foo2.b, READ_ONCE(foo1.b));
+ foo2.c = foo1.c;
+
+그렇지만, volatile 로 마크된 변수에 대해서는 READ_ONCE() 와 WRITE_ONCE() 가
+필요치 않습니다. 예를 들어, 'jiffies' 는 volatile 로 마크되어 있기 때문에,
+READ_ONCE(jiffies) 라고 할 필요가 없습니다. READ_ONCE() 와 WRITE_ONCE() 가
+실은 volatile 캐스팅으로 구현되어 있어서 인자가 이미 volatile 로 마크되어
+있다면 또다른 효과를 내지는 않기 때문입니다.
+
+이 컴파일러 배리어들은 CPU 에는 직접적 효과를 전혀 만들지 않기 때문에, 결국은
+재배치가 일어날 수도 있음을 부디 기억해 두십시오.
+
+
+CPU 메모리 배리어
+-----------------
+
+리눅스 커널은 다음의 여덟개 기본 CPU 메모리 배리어를 가지고 있습니다:
+
+ TYPE MANDATORY SMP CONDITIONAL
+ =============== ======================= ===========================
+ 범용 mb() smp_mb()
+ 쓰기 wmb() smp_wmb()
+ 읽기 rmb() smp_rmb()
+ 데이터 의존성 read_barrier_depends() smp_read_barrier_depends()
+
+
+데이터 의존성 배리어를 제외한 모든 메모리 배리어는 컴파일러 배리어를
+포함합니다. 데이터 의존성은 컴파일러에의 추가적인 순서 보장을 포함하지
+않습니다.
+
+방백: 데이터 의존성이 있는 경우, 컴파일러는 해당 로드를 올바른 순서로 일으킬
+것으로 (예: `a[b]` 는 a[b] 를 로드 하기 전에 b 의 값을 먼저 로드한다)
+기대되지만, C 언어 사양에는 컴파일러가 b 의 값을 추측 (예: 1 과 같음) 해서
+b 로드 전에 a 로드를 하는 코드 (예: tmp = a[1]; if (b != 1) tmp = a[b]; ) 를
+만들지 않아야 한다는 내용 같은 건 없습니다. 또한 컴파일러는 a[b] 를 로드한
+후에 b 를 또다시 로드할 수도 있어서, a[b] 보다 최신 버전의 b 값을 가질 수도
+있습니다. 이런 문제들의 해결책에 대한 의견 일치는 아직 없습니다만, 일단
+READ_ONCE() 매크로부터 보기 시작하는게 좋은 시작이 될겁니다.
+
+SMP 메모리 배리어들은 유니프로세서로 컴파일된 시스템에서는 컴파일러 배리어로
+바뀌는데, 하나의 CPU 는 스스로 일관성을 유지하고, 겹치는 액세스들 역시 올바른
+순서로 행해질 것으로 생각되기 때문입니다. 하지만, 아래의 "Virtual Machine
+Guests" 서브섹션을 참고하십시오.
+
+[!] SMP 시스템에서 공유메모리로의 접근들을 순서 세워야 할 때, SMP 메모리
+배리어는 _반드시_ 사용되어야 함을 기억하세요, 그대신 락을 사용하는 것으로도
+충분하긴 하지만 말이죠.
+
+Mandatory 배리어들은 SMP 시스템에서도 UP 시스템에서도 SMP 효과만 통제하기에는
+불필요한 오버헤드를 갖기 때문에 SMP 효과만 통제하면 되는 곳에는 사용되지 않아야
+합니다. 하지만, 느슨한 순서 규칙의 메모리 I/O 윈도우를 통한 MMIO 의 효과를
+통제할 때에는 mandatory 배리어들이 사용될 수 있습니다. 이 배리어들은
+컴파일러와 CPU 모두 재배치를 못하도록 함으로써 메모리 오퍼레이션들이 디바이스에
+보여지는 순서에도 영향을 주기 때문에, SMP 가 아닌 시스템이라 할지라도 필요할 수
+있습니다.
+
+
+일부 고급 배리어 함수들도 있습니다:
+
+ (*) smp_store_mb(var, value)
+
+ 이 함수는 특정 변수에 특정 값을 대입하고 범용 메모리 배리어를 칩니다.
+ UP 컴파일에서는 컴파일러 배리어보다 더한 것을 친다고는 보장되지 않습니다.
+
+
+ (*) smp_mb__before_atomic();
+ (*) smp_mb__after_atomic();
+
+ 이것들은 값을 리턴하지 않는 (더하기, 빼기, 증가, 감소와 같은) 어토믹
+ 함수들을 위한, 특히 그것들이 레퍼런스 카운팅에 사용될 때를 위한
+ 함수들입니다. 이 함수들은 메모리 배리어를 내포하고 있지는 않습니다.
+
+ 이것들은 값을 리턴하지 않으며 어토믹한 (set_bit 과 clear_bit 같은) 비트
+ 연산에도 사용될 수 있습니다.
+
+ 한 예로, 객체 하나를 무효한 것으로 표시하고 그 객체의 레퍼런스 카운트를
+ 감소시키는 다음 코드를 보세요:
+
+ obj->dead = 1;
+ smp_mb__before_atomic();
+ atomic_dec(&obj->ref_count);
+
+ 이 코드는 객체의 업데이트된 death 마크가 레퍼런스 카운터 감소 동작
+ *전에* 보일 것을 보장합니다.
+
+ 더 많은 정보를 위해선 Documentation/atomic_ops.txt 문서를 참고하세요.
+ 어디서 이것들을 사용해야 할지 궁금하다면 "어토믹 오퍼레이션" 서브섹션을
+ 참고하세요.
+
+
+ (*) lockless_dereference();
+
+ 이 함수는 smp_read_barrier_depends() 데이터 의존성 배리어를 사용하는
+ 포인터 읽어오기 래퍼(wrapper) 함수로 생각될 수 있습니다.
+
+ 객체의 라이프타임이 RCU 외의 메커니즘으로 관리된다는 점을 제외하면
+ rcu_dereference() 와도 유사한데, 예를 들면 객체가 시스템이 꺼질 때에만
+ 제거되는 경우 등입니다. 또한, lockless_dereference() 은 RCU 와 함께
+ 사용될수도, RCU 없이 사용될 수도 있는 일부 데이터 구조에 사용되고
+ 있습니다.
+
+
+ (*) dma_wmb();
+ (*) dma_rmb();
+
+ 이것들은 CPU 와 DMA 가능한 디바이스에서 모두 액세스 가능한 공유 메모리의
+ 읽기, 쓰기 작업들의 순서를 보장하기 위해 consistent memory 에서 사용하기
+ 위한 것들입니다.
+
+ 예를 들어, 디바이스와 메모리를 공유하며, 디스크립터 상태 값을 사용해
+ 디스크립터가 디바이스에 속해 있는지 아니면 CPU 에 속해 있는지 표시하고,
+ 공지용 초인종(doorbell) 을 사용해 업데이트된 디스크립터가 디바이스에 사용
+ 가능해졌음을 공지하는 디바이스 드라이버를 생각해 봅시다:
+
+ if (desc->status != DEVICE_OWN) {
+ /* 디스크립터를 소유하기 전에는 데이터를 읽지 않음 */
+ dma_rmb();
+
+ /* 데이터를 읽고 씀 */
+ read_data = desc->data;
+ desc->data = write_data;
+
+ /* 상태 업데이트 전 수정사항을 반영 */
+ dma_wmb();
+
+ /* 소유권을 수정 */
+ desc->status = DEVICE_OWN;
+
+ /* MMIO 를 통해 디바이스에 공지를 하기 전에 메모리를 동기화 */
+ wmb();
+
+ /* 업데이트된 디스크립터의 디바이스에 공지 */
+ writel(DESC_NOTIFY, doorbell);
+ }
+
+ dma_rmb() 는 디스크립터로부터 데이터를 읽어오기 전에 디바이스가 소유권을
+ 내놓았음을 보장하게 하고, dma_wmb() 는 디바이스가 자신이 소유권을 다시
+ 가졌음을 보기 전에 디스크립터에 데이터가 쓰였음을 보장합니다. wmb() 는
+ 캐시 일관성이 없는 (cache incoherent) MMIO 영역에 쓰기를 시도하기 전에
+ 캐시 일관성이 있는 메모리 (cache coherent memory) 쓰기가 완료되었음을
+ 보장해주기 위해 필요합니다.
+
+ consistent memory 에 대한 자세한 내용을 위해선 Documentation/DMA-API.txt
+ 문서를 참고하세요.
+
+
+MMIO 쓰기 배리어
+----------------
+
+리눅스 커널은 또한 memory-mapped I/O 쓰기를 위한 특별한 배리어도 가지고
+있습니다:
+
+ mmiowb();
+
+이것은 mandatory 쓰기 배리어의 변종으로, 완화된 순서 규칙의 I/O 영역에으로의
+쓰기가 부분적으로 순서를 맞추도록 해줍니다. 이 함수는 CPU->하드웨어 사이를
+넘어서 실제 하드웨어에까지 일부 수준의 영향을 끼칩니다.
+
+더 많은 정보를 위해선 "Acquire vs I/O 액세스" 서브섹션을 참고하세요.
+
+
+=========================
+암묵적 커널 메모리 배리어
+=========================
+
+리눅스 커널의 일부 함수들은 메모리 배리어를 내장하고 있는데, 락(lock)과
+스케쥴링 관련 함수들이 대부분입니다.
+
+여기선 _최소한의_ 보장을 설명합니다; 특정 아키텍쳐에서는 이 설명보다 더 많은
+보장을 제공할 수도 있습니다만 해당 아키텍쳐에 종속적인 코드 외의 부분에서는
+그런 보장을 기대해선 안될겁니다.
+
+
+락 ACQUISITION 함수
+-------------------
+
+리눅스 커널은 다양한 락 구성체를 가지고 있습니다:
+
+ (*) 스핀 락
+ (*) R/W 스핀 락
+ (*) 뮤텍스
+ (*) 세마포어
+ (*) R/W 세마포어
+
+각 구성체마다 모든 경우에 "ACQUIRE" 오퍼레이션과 "RELEASE" 오퍼레이션의 변종이
+존재합니다. 이 오퍼레이션들은 모두 적절한 배리어를 내포하고 있습니다:
+
+ (1) ACQUIRE 오퍼레이션의 영향:
+
+ ACQUIRE 뒤에서 요청된 메모리 오퍼레이션은 ACQUIRE 오퍼레이션이 완료된
+ 뒤에 완료됩니다.
+
+ ACQUIRE 앞에서 요청된 메모리 오퍼레이션은 ACQUIRE 오퍼레이션이 완료된 후에
+ 완료될 수 있습니다. smp_mb__before_spinlock() 뒤에 ACQUIRE 가 실행되는
+ 코드 블록은 블록 앞의 스토어를 블록 뒤의 로드와 스토어에 대해 순서
+ 맞춥니다. 이건 smp_mb() 보다 완화된 것임을 기억하세요! 많은 아키텍쳐에서
+ smp_mb__before_spinlock() 은 사실 아무일도 하지 않습니다.
+
+ (2) RELEASE 오퍼레이션의 영향:
+
+ RELEASE 앞에서 요청된 메모리 오퍼레이션은 RELEASE 오퍼레이션이 완료되기
+ 전에 완료됩니다.
+
+ RELEASE 뒤에서 요청된 메모리 오퍼레이션은 RELEASE 오퍼레이션 완료 전에
+ 완료될 수 있습니다.
+
+ (3) ACQUIRE vs ACQUIRE 영향:
+
+ 어떤 ACQUIRE 오퍼레이션보다 앞에서 요청된 모든 ACQUIRE 오퍼레이션은 그
+ ACQUIRE 오퍼레이션 전에 완료됩니다.
+
+ (4) ACQUIRE vs RELEASE implication:
+
+ 어떤 RELEASE 오퍼레이션보다 앞서 요청된 ACQUIRE 오퍼레이션은 그 RELEASE
+ 오퍼레이션보다 먼저 완료됩니다.
+
+ (5) 실패한 조건적 ACQUIRE 영향:
+
+ ACQUIRE 오퍼레이션의 일부 락(lock) 변종은 락이 곧바로 획득하기에는
+ 불가능한 상태이거나 락이 획득 가능해지도록 기다리는 도중 시그널을 받거나
+ 해서 실패할 수 있습니다. 실패한 락은 어떤 배리어도 내포하지 않습니다.
+
+[!] 참고: 락 ACQUIRE 와 RELEASE 가 단방향 배리어여서 나타나는 현상 중 하나는
+크리티컬 섹션 바깥의 인스트럭션의 영향이 크리티컬 섹션 내부로도 들어올 수
+있다는 것입니다.
+
+RELEASE 후에 요청되는 ACQUIRE 는 전체 메모리 배리어라 여겨지면 안되는데,
+ACQUIRE 앞의 액세스가 ACQUIRE 후에 수행될 수 있고, RELEASE 후의 액세스가
+RELEASE 전에 수행될 수도 있으며, 그 두개의 액세스가 서로를 지나칠 수도 있기
+때문입니다:
+
+ *A = a;
+ ACQUIRE M
+ RELEASE M
+ *B = b;
+
+는 다음과 같이 될 수도 있습니다:
+
+ ACQUIRE M, STORE *B, STORE *A, RELEASE M
+
+ACQUIRE 와 RELEASE 가 락 획득과 해제라면, 그리고 락의 ACQUIRE 와 RELEASE 가
+같은 락 변수에 대한 것이라면, 해당 락을 쥐고 있지 않은 다른 CPU 의 시야에는
+이와 같은 재배치가 일어나는 것으로 보일 수 있습니다. 요약하자면, ACQUIRE 에
+이어 RELEASE 오퍼레이션을 순차적으로 실행하는 행위가 전체 메모리 배리어로
+생각되어선 -안됩니다-.
+
+비슷하게, 앞의 반대 케이스인 RELEASE 와 ACQUIRE 두개 오퍼레이션의 순차적 실행
+역시 전체 메모리 배리어를 내포하지 않습니다. 따라서, RELEASE, ACQUIRE 로
+규정되는 크리티컬 섹션의 CPU 수행은 RELEASE 와 ACQUIRE 를 가로지를 수 있으므로,
+다음과 같은 코드는:
+
+ *A = a;
+ RELEASE M
+ ACQUIRE N
+ *B = b;
+
+다음과 같이 수행될 수 있습니다:
+
+ ACQUIRE N, STORE *B, STORE *A, RELEASE M
+
+이런 재배치는 데드락을 일으킬 수도 있을 것처럼 보일 수 있습니다. 하지만, 그런
+데드락의 조짐이 있다면 RELEASE 는 단순히 완료될 것이므로 데드락은 존재할 수
+없습니다.
+
+ 이게 어떻게 올바른 동작을 할 수 있을까요?
+
+ 우리가 이야기 하고 있는건 재배치를 하는 CPU 에 대한 이야기이지,
+ 컴파일러에 대한 것이 아니란 점이 핵심입니다. 컴파일러 (또는, 개발자)
+ 가 오퍼레이션들을 이렇게 재배치하면, 데드락이 일어날 수 -있습-니다.
+
+ 하지만 CPU 가 오퍼레이션들을 재배치 했다는걸 생각해 보세요. 이 예에서,
+ 어셈블리 코드 상으로는 언락이 락을 앞서게 되어 있습니다. CPU 가 이를
+ 재배치해서 뒤의 락 오퍼레이션을 먼저 실행하게 됩니다. 만약 데드락이
+ 존재한다면, 이 락 오퍼레이션은 그저 스핀을 하며 계속해서 락을
+ 시도합니다 (또는, 한참 후에겠지만, 잠듭니다). CPU 는 언젠가는
+ (어셈블리 코드에서는 락을 앞서는) 언락 오퍼레이션을 실행하는데, 이 언락
+ 오퍼레이션이 잠재적 데드락을 해결하고, 락 오퍼레이션도 뒤이어 성공하게
+ 됩니다.
+
+ 하지만 만약 락이 잠을 자는 타입이었다면요? 그런 경우에 코드는
+ 스케쥴러로 들어가려 할 거고, 여기서 결국은 메모리 배리어를 만나게
+ 되는데, 이 메모리 배리어는 앞의 언락 오퍼레이션이 완료되도록 만들고,
+ 데드락은 이번에도 해결됩니다. 잠을 자는 행위와 언락 사이의 경주 상황
+ (race) 도 있을 수 있겠습니다만, 락 관련 기능들은 그런 경주 상황을 모든
+ 경우에 제대로 해결할 수 있어야 합니다.
+
+락과 세마포어는 UP 컴파일된 시스템에서의 순서에 대해 보장을 하지 않기 때문에,
+그런 상황에서 인터럽트 비활성화 오퍼레이션과 함께가 아니라면 어떤 일에도 - 특히
+I/O 액세스와 관련해서는 - 제대로 사용될 수 없을 겁니다.
+
+"CPU 간 ACQUIRING 배리어 효과" 섹션도 참고하시기 바랍니다.
+
+
+예를 들어, 다음과 같은 코드를 생각해 봅시다:
+
+ *A = a;
+ *B = b;
+ ACQUIRE
+ *C = c;
+ *D = d;
+ RELEASE
+ *E = e;
+ *F = f;
+
+여기선 다음의 이벤트 시퀀스가 생길 수 있습니다:
+
+ ACQUIRE, {*F,*A}, *E, {*C,*D}, *B, RELEASE
+
+ [+] {*F,*A} 는 조합된 액세스를 의미합니다.
+
+하지만 다음과 같은 건 불가능하죠:
+
+ {*F,*A}, *B, ACQUIRE, *C, *D, RELEASE, *E
+ *A, *B, *C, ACQUIRE, *D, RELEASE, *E, *F
+ *A, *B, ACQUIRE, *C, RELEASE, *D, *E, *F
+ *B, ACQUIRE, *C, *D, RELEASE, {*F,*A}, *E
+
+
+
+인터럽트 비활성화 함수
+----------------------
+
+인터럽트를 비활성화 하는 함수 (ACQUIRE 와 동일) 와 인터럽트를 활성화 하는 함수
+(RELEASE 와 동일) 는 컴파일러 배리어처럼만 동작합니다. 따라서, 별도의 메모리
+배리어나 I/O 배리어가 필요한 상황이라면 그 배리어들은 인터럽트 비활성화 함수
+외의 방법으로 제공되어야만 합니다.
+
+
+슬립과 웨이크업 함수
+--------------------
+
+글로벌 데이터에 표시된 이벤트에 의해 프로세스를 잠에 빠트리는 것과 깨우는 것은
+해당 이벤트를 기다리는 태스크의 태스크 상태와 그 이벤트를 알리기 위해 사용되는
+글로벌 데이터, 두 데이터간의 상호작용으로 볼 수 있습니다. 이것이 옳은 순서대로
+일어남을 분명히 하기 위해, 프로세스를 잠에 들게 하는 기능과 깨우는 기능은
+몇가지 배리어를 내포합니다.
+
+먼저, 잠을 재우는 쪽은 일반적으로 다음과 같은 이벤트 시퀀스를 따릅니다:
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (event_indicated)
+ break;
+ schedule();
+ }
+
+set_current_state() 에 의해, 태스크 상태가 바뀐 후 범용 메모리 배리어가
+자동으로 삽입됩니다:
+
+ CPU 1
+ ===============================
+ set_current_state();
+ smp_store_mb();
+ STORE current->state
+ <범용 배리어>
+ LOAD event_indicated
+
+set_current_state() 는 다음의 것들로 감싸질 수도 있습니다:
+
+ prepare_to_wait();
+ prepare_to_wait_exclusive();
+
+이것들 역시 상태를 설정한 후 범용 메모리 배리어를 삽입합니다.
+앞의 전체 시퀀스는 다음과 같은 함수들로 한번에 수행 가능한데, 이것들은 모두
+올바른 장소에 메모리 배리어를 삽입합니다:
+
+ wait_event();
+ wait_event_interruptible();
+ wait_event_interruptible_exclusive();
+ wait_event_interruptible_timeout();
+ wait_event_killable();
+ wait_event_timeout();
+ wait_on_bit();
+ wait_on_bit_lock();
+
+
+두번째로, 깨우기를 수행하는 코드는 일반적으로 다음과 같을 겁니다:
+
+ event_indicated = 1;
+ wake_up(&event_wait_queue);
+
+또는:
+
+ event_indicated = 1;
+ wake_up_process(event_daemon);
+
+wake_up() 류에 의해 쓰기 메모리 배리어가 내포됩니다. 만약 그것들이 뭔가를
+깨운다면요. 이 배리어는 태스크 상태가 지워지기 전에 수행되므로, 이벤트를
+알리기 위한 STORE 와 태스크 상태를 TASK_RUNNING 으로 설정하는 STORE 사이에
+위치하게 됩니다.
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ set_current_state(); STORE event_indicated
+ smp_store_mb(); wake_up();
+ STORE current->state <쓰기 배리어>
+ <범용 배리어> STORE current->state
+ LOAD event_indicated
+
+한번더 말합니다만, 이 쓰기 메모리 배리어는 이 코드가 정말로 뭔가를 깨울 때에만
+실행됩니다. 이걸 설명하기 위해, X 와 Y 는 모두 0 으로 초기화 되어 있다는 가정
+하에 아래의 이벤트 시퀀스를 생각해 봅시다:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ X = 1; STORE event_indicated
+ smp_mb(); wake_up();
+ Y = 1; wait_event(wq, Y == 1);
+ wake_up(); load from Y sees 1, no memory barrier
+ load from X might see 0
+
+위 예제에서의 경우와 달리 깨우기가 정말로 행해졌다면, CPU 2 의 X 로드는 1 을
+본다고 보장될 수 있을 겁니다.
+
+사용 가능한 깨우기류 함수들로 다음과 같은 것들이 있습니다:
+
+ complete();
+ wake_up();
+ wake_up_all();
+ wake_up_bit();
+ wake_up_interruptible();
+ wake_up_interruptible_all();
+ wake_up_interruptible_nr();
+ wake_up_interruptible_poll();
+ wake_up_interruptible_sync();
+ wake_up_interruptible_sync_poll();
+ wake_up_locked();
+ wake_up_locked_poll();
+ wake_up_nr();
+ wake_up_poll();
+ wake_up_process();
+
+
+[!] 잠재우는 코드와 깨우는 코드에 내포되는 메모리 배리어들은 깨우기 전에
+이루어진 스토어를 잠재우는 코드가 set_current_state() 를 호출한 후에 행하는
+로드에 대해 순서를 맞추지 _않는다는_ 점을 기억하세요. 예를 들어, 잠재우는
+코드가 다음과 같고:
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (event_indicated)
+ break;
+ __set_current_state(TASK_RUNNING);
+ do_something(my_data);
+
+깨우는 코드는 다음과 같다면:
+
+ my_data = value;
+ event_indicated = 1;
+ wake_up(&event_wait_queue);
+
+event_indecated 에의 변경이 잠재우는 코드에게 my_data 에의 변경 후에 이루어진
+것으로 인지될 것이라는 보장이 없습니다. 이런 경우에는 양쪽 코드 모두 각각의
+데이터 액세스 사이에 메모리 배리어를 직접 쳐야 합니다. 따라서 앞의 재우는
+코드는 다음과 같이:
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (event_indicated) {
+ smp_rmb();
+ do_something(my_data);
+ }
+
+그리고 깨우는 코드는 다음과 같이 되어야 합니다:
+
+ my_data = value;
+ smp_wmb();
+ event_indicated = 1;
+ wake_up(&event_wait_queue);
+
+
+그외의 함수들
+-------------
+
+그외의 배리어를 내포하는 함수들은 다음과 같습니다:
+
+ (*) schedule() 과 그 유사한 것들이 완전한 메모리 배리어를 내포합니다.
+
+
+==============================
+CPU 간 ACQUIRING 배리어의 효과
+==============================
+
+SMP 시스템에서의 락 기능들은 더욱 강력한 형태의 배리어를 제공합니다: 이
+배리어는 동일한 락을 사용하는 다른 CPU 들의 메모리 액세스 순서에도 영향을
+끼칩니다.
+
+
+ACQUIRE VS 메모리 액세스
+------------------------
+
+다음의 예를 생각해 봅시다: 시스템은 두개의 스핀락 (M) 과 (Q), 그리고 세개의 CPU
+를 가지고 있습니다; 여기에 다음의 이벤트 시퀀스가 발생합니다:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ WRITE_ONCE(*A, a); WRITE_ONCE(*E, e);
+ ACQUIRE M ACQUIRE Q
+ WRITE_ONCE(*B, b); WRITE_ONCE(*F, f);
+ WRITE_ONCE(*C, c); WRITE_ONCE(*G, g);
+ RELEASE M RELEASE Q
+ WRITE_ONCE(*D, d); WRITE_ONCE(*H, h);
+
+*A 로의 액세스부터 *H 로의 액세스까지가 어떤 순서로 CPU 3 에게 보여질지에
+대해서는 각 CPU 에서의 락 사용에 의해 내포되어 있는 제약을 제외하고는 어떤
+보장도 존재하지 않습니다. 예를 들어, CPU 3 에게 다음과 같은 순서로 보여지는
+것이 가능합니다:
+
+ *E, ACQUIRE M, ACQUIRE Q, *G, *C, *F, *A, *B, RELEASE Q, *D, *H, RELEASE M
+
+하지만 다음과 같이 보이지는 않을 겁니다:
+
+ *B, *C or *D preceding ACQUIRE M
+ *A, *B or *C following RELEASE M
+ *F, *G or *H preceding ACQUIRE Q
+ *E, *F or *G following RELEASE Q
+
+
+
+ACQUIRE VS I/O 액세스
+----------------------
+
+특정한 (특히 NUMA 가 관련된) 환경 하에서 두개의 CPU 에서 동일한 스핀락으로
+보호되는 두개의 크리티컬 섹션 안의 I/O 액세스는 PCI 브릿지에 겹쳐진 I/O
+액세스로 보일 수 있는데, PCI 브릿지는 캐시 일관성 프로토콜과 합을 맞춰야 할
+의무가 없으므로, 필요한 읽기 메모리 배리어가 요청되지 않기 때문입니다.
+
+예를 들어서:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ spin_lock(Q)
+ writel(0, ADDR)
+ writel(1, DATA);
+ spin_unlock(Q);
+ spin_lock(Q);
+ writel(4, ADDR);
+ writel(5, DATA);
+ spin_unlock(Q);
+
+는 PCI 브릿지에 다음과 같이 보일 수 있습니다:
+
+ STORE *ADDR = 0, STORE *ADDR = 4, STORE *DATA = 1, STORE *DATA = 5
+
+이렇게 되면 하드웨어의 오동작을 일으킬 수 있습니다.
+
+
+이런 경우엔 잡아둔 스핀락을 내려놓기 전에 mmiowb() 를 수행해야 하는데, 예를
+들면 다음과 같습니다:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ spin_lock(Q)
+ writel(0, ADDR)
+ writel(1, DATA);
+ mmiowb();
+ spin_unlock(Q);
+ spin_lock(Q);
+ writel(4, ADDR);
+ writel(5, DATA);
+ mmiowb();
+ spin_unlock(Q);
+
+이 코드는 CPU 1 에서 요청된 두개의 스토어가 PCI 브릿지에 CPU 2 에서 요청된
+스토어들보다 먼저 보여짐을 보장합니다.
+
+
+또한, 같은 디바이스에서 스토어를 이어 로드가 수행되면 이 로드는 로드가 수행되기
+전에 스토어가 완료되기를 강제하므로 mmiowb() 의 필요가 없어집니다:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ spin_lock(Q)
+ writel(0, ADDR)
+ a = readl(DATA);
+ spin_unlock(Q);
+ spin_lock(Q);
+ writel(4, ADDR);
+ b = readl(DATA);
+ spin_unlock(Q);
+
+
+더 많은 정보를 위해선 Documenataion/DocBook/deviceiobook.tmpl 을 참고하세요.
+
+
+=========================
+메모리 배리어가 필요한 곳
+=========================
+
+설령 SMP 커널을 사용하더라도 싱글 쓰레드로 동작하는 코드는 올바르게 동작하는
+것으로 보여질 것이기 때문에, 평범한 시스템 운영중에 메모리 오퍼레이션 재배치는
+일반적으로 문제가 되지 않습니다. 하지만, 재배치가 문제가 _될 수 있는_ 네가지
+환경이 있습니다:
+
+ (*) 프로세서간 상호 작용.
+
+ (*) 어토믹 오퍼레이션.
+
+ (*) 디바이스 액세스.
+
+ (*) 인터럽트.
+
+
+프로세서간 상호 작용
+--------------------
+
+두개 이상의 프로세서를 가진 시스템이 있다면, 시스템의 두개 이상의 CPU 는 동시에
+같은 데이터에 대한 작업을 할 수 있습니다. 이는 동기화 문제를 일으킬 수 있고,
+이 문제를 해결하는 일반적 방법은 락을 사용하는 것입니다. 하지만, 락은 상당히
+비용이 비싸서 가능하면 락을 사용하지 않고 일을 처리하는 것이 낫습니다. 이런
+경우, 두 CPU 모두에 영향을 끼치는 오퍼레이션들은 오동작을 막기 위해 신중하게
+순서가 맞춰져야 합니다.
+
+예를 들어, R/W 세마포어의 느린 수행경로 (slow path) 를 생각해 봅시다.
+세마포어를 위해 대기를 하는 하나의 프로세스가 자신의 스택 중 일부를 이
+세마포어의 대기 프로세스 리스트에 링크한 채로 있습니다:
+
+ struct rw_semaphore {
+ ...
+ spinlock_t lock;
+ struct list_head waiters;
+ };
+
+ struct rwsem_waiter {
+ struct list_head list;
+ struct task_struct *task;
+ };
+
+특정 대기 상태 프로세스를 깨우기 위해, up_read() 나 up_write() 함수는 다음과
+같은 일을 합니다:
+
+ (1) 다음 대기 상태 프로세스 레코드는 어디있는지 알기 위해 이 대기 상태
+ 프로세스 레코드의 next 포인터를 읽습니다;
+
+ (2) 이 대기 상태 프로세스의 task 구조체로의 포인터를 읽습니다;
+
+ (3) 이 대기 상태 프로세스가 세마포어를 획득했음을 알리기 위해 task
+ 포인터를 초기화 합니다;
+
+ (4) 해당 태스크에 대해 wake_up_process() 를 호출합니다; 그리고
+
+ (5) 해당 대기 상태 프로세스의 task 구조체를 잡고 있던 레퍼런스를 해제합니다.
+
+달리 말하자면, 다음 이벤트 시퀀스를 수행해야 합니다:
+
+ LOAD waiter->list.next;
+ LOAD waiter->task;
+ STORE waiter->task;
+ CALL wakeup
+ RELEASE task
+
+그리고 이 이벤트들이 다른 순서로 수행된다면, 오동작이 일어날 수 있습니다.
+
+한번 세마포어의 대기줄에 들어갔고 세마포어 락을 놓았다면, 해당 대기 프로세스는
+락을 다시는 잡지 않습니다; 대신 자신의 task 포인터가 초기화 되길 기다립니다.
+그 레코드는 대기 프로세스의 스택에 있기 때문에, 리스트의 next 포인터가 읽혀지기
+_전에_ task 포인터가 지워진다면, 다른 CPU 는 해당 대기 프로세스를 시작해 버리고
+up*() 함수가 next 포인터를 읽기 전에 대기 프로세스의 스택을 마구 건드릴 수
+있습니다.
+
+그렇게 되면 위의 이벤트 시퀀스에 어떤 일이 일어나는지 생각해 보죠:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ down_xxx()
+ Queue waiter
+ Sleep
+ up_yyy()
+ LOAD waiter->task;
+ STORE waiter->task;
+ Woken up by other event
+ <preempt>
+ Resume processing
+ down_xxx() returns
+ call foo()
+ foo() clobbers *waiter
+ </preempt>
+ LOAD waiter->list.next;
+ --- OOPS ---
+
+이 문제는 세마포어 락의 사용으로 해결될 수도 있겠지만, 그렇게 되면 깨어난 후에
+down_xxx() 함수가 불필요하게 스핀락을 또다시 얻어야만 합니다.
+
+이 문제를 해결하는 방법은 범용 SMP 메모리 배리어를 추가하는 겁니다:
+
+ LOAD waiter->list.next;
+ LOAD waiter->task;
+ smp_mb();
+ STORE waiter->task;
+ CALL wakeup
+ RELEASE task
+
+이 경우에, 배리어는 시스템의 나머지 CPU 들에게 모든 배리어 앞의 메모리 액세스가
+배리어 뒤의 메모리 액세스보다 앞서 일어난 것으로 보이게 만듭니다. 배리어 앞의
+메모리 액세스들이 배리어 명령 자체가 완료되는 시점까지 완료된다고는 보장하지
+_않습니다_.
+
+(이게 문제가 되지 않을) 단일 프로세서 시스템에서 smp_mb() 는 실제로는 그저
+컴파일러가 CPU 안에서의 순서를 바꾸거나 하지 않고 주어진 순서대로 명령을
+내리도록 하는 컴파일러 배리어일 뿐입니다. 오직 하나의 CPU 만 있으니, CPU 의
+의존성 순서 로직이 그 외의 모든것을 알아서 처리할 겁니다.
+
+
+어토믹 오퍼레이션
+-----------------
+
+어토믹 오퍼레이션은 기술적으로 프로세서간 상호작용으로 분류되며 그 중 일부는
+전체 메모리 배리어를 내포하고 또 일부는 내포하지 않지만, 커널에서 상당히
+의존적으로 사용하는 기능 중 하나입니다.
+
+메모리의 어떤 상태를 수정하고 해당 상태에 대한 (예전의 또는 최신의) 정보를
+리턴하는 어토믹 오퍼레이션은 모두 SMP-조건적 범용 메모리 배리어(smp_mb())를
+실제 오퍼레이션의 앞과 뒤에 내포합니다. 이런 오퍼레이션은 다음의 것들을
+포함합니다:
+
+ xchg();
+ atomic_xchg(); atomic_long_xchg();
+ atomic_inc_return(); atomic_long_inc_return();
+ atomic_dec_return(); atomic_long_dec_return();
+ atomic_add_return(); atomic_long_add_return();
+ atomic_sub_return(); atomic_long_sub_return();
+ atomic_inc_and_test(); atomic_long_inc_and_test();
+ atomic_dec_and_test(); atomic_long_dec_and_test();
+ atomic_sub_and_test(); atomic_long_sub_and_test();
+ atomic_add_negative(); atomic_long_add_negative();
+ test_and_set_bit();
+ test_and_clear_bit();
+ test_and_change_bit();
+
+ /* exchange 조건이 성공할 때 */
+ cmpxchg();
+ atomic_cmpxchg(); atomic_long_cmpxchg();
+ atomic_add_unless(); atomic_long_add_unless();
+
+이것들은 메모리 배리어 효과가 필요한 ACQUIRE 부류와 RELEASE 부류 오퍼레이션들을
+구현할 때, 그리고 객체 해제를 위해 레퍼런스 카운터를 조정할 때, 암묵적 메모리
+배리어 효과가 필요한 곳 등에 사용됩니다.
+
+
+다음의 오퍼레이션들은 메모리 배리어를 내포하지 _않기_ 때문에 문제가 될 수
+있지만, RELEASE 부류의 오퍼레이션들과 같은 것들을 구현할 때 사용될 수도
+있습니다:
+
+ atomic_set();
+ set_bit();
+ clear_bit();
+ change_bit();
+
+이것들을 사용할 때에는 필요하다면 적절한 (예를 들면 smp_mb__before_atomic()
+같은) 메모리 배리어가 명시적으로 함께 사용되어야 합니다.
+
+
+아래의 것들도 메모리 배리어를 내포하지 _않기_ 때문에, 일부 환경에서는 (예를
+들면 smp_mb__before_atomic() 과 같은) 명시적인 메모리 배리어 사용이 필요합니다.
+
+ atomic_add();
+ atomic_sub();
+ atomic_inc();
+ atomic_dec();
+
+이것들이 통계 생성을 위해 사용된다면, 그리고 통계 데이터 사이에 관계가 존재하지
+않는다면 메모리 배리어는 필요치 않을 겁니다.
+
+객체의 수명을 관리하기 위해 레퍼런스 카운팅 목적으로 사용된다면, 레퍼런스
+카운터는 락으로 보호되는 섹션에서만 조정되거나 호출하는 쪽이 이미 충분한
+레퍼런스를 잡고 있을 것이기 때문에 메모리 배리어는 아마 필요 없을 겁니다.
+
+만약 어떤 락을 구성하기 위해 사용된다면, 락 관련 동작은 일반적으로 작업을 특정
+순서대로 진행해야 하므로 메모리 배리어가 필요할 수 있습니다.
+
+기본적으로, 각 사용처에서는 메모리 배리어가 필요한지 아닌지 충분히 고려해야
+합니다.
+
+아래의 오퍼레이션들은 특별한 락 관련 동작들입니다:
+
+ test_and_set_bit_lock();
+ clear_bit_unlock();
+ __clear_bit_unlock();
+
+이것들은 ACQUIRE 류와 RELEASE 류의 오퍼레이션들을 구현합니다. 락 관련 도구를
+구현할 때에는 이것들을 좀 더 선호하는 편이 나은데, 이것들의 구현은 많은
+아키텍쳐에서 최적화 될 수 있기 때문입니다.
+
+[!] 이런 상황에 사용할 수 있는 특수한 메모리 배리어 도구들이 있습니다만, 일부
+CPU 에서는 사용되는 어토믹 인스트럭션 자체에 메모리 배리어가 내포되어 있어서
+어토믹 오퍼레이션과 메모리 배리어를 함께 사용하는 게 불필요한 일이 될 수
+있는데, 그런 경우에 이 특수 메모리 배리어 도구들은 no-op 이 되어 실질적으로
+아무일도 하지 않습니다.
+
+더 많은 내용을 위해선 Documentation/atomic_ops.txt 를 참고하세요.
+
+
+디바이스 액세스
+---------------
+
+많은 디바이스가 메모리 매핑 기법으로 제어될 수 있는데, 그렇게 제어되는
+디바이스는 CPU 에는 단지 특정 메모리 영역의 집합처럼 보이게 됩니다. 드라이버는
+그런 디바이스를 제어하기 위해 정확히 올바른 순서로 올바른 메모리 액세스를
+만들어야 합니다.
+
+하지만, 액세스들을 재배치 하거나 조합하거나 병합하는게 더 효율적이라 판단하는
+영리한 CPU 나 컴파일러들을 사용하면 드라이버 코드의 조심스럽게 순서 맞춰진
+액세스들이 디바이스에는 요청된 순서대로 도착하지 못하게 할 수 있는 - 디바이스가
+오동작을 하게 할 - 잠재적 문제가 생길 수 있습니다.
+
+리눅스 커널 내부에서, I/O 는 어떻게 액세스들을 적절히 순차적이게 만들 수 있는지
+알고 있는, - inb() 나 writel() 과 같은 - 적절한 액세스 루틴을 통해 이루어져야만
+합니다. 이것들은 대부분의 경우에는 명시적 메모리 배리어 와 함께 사용될 필요가
+없습니다만, 다음의 두가지 상황에서는 명시적 메모리 배리어가 필요할 수 있습니다:
+
+ (1) 일부 시스템에서 I/O 스토어는 모든 CPU 에 일관되게 순서 맞춰지지 않는데,
+ 따라서 _모든_ 일반적인 드라이버들에 락이 사용되어야만 하고 이 크리티컬
+ 섹션을 빠져나오기 전에 mmiowb() 가 꼭 호출되어야 합니다.
+
+ (2) 만약 액세스 함수들이 완화된 메모리 액세스 속성을 갖는 I/O 메모리 윈도우를
+ 사용한다면, 순서를 강제하기 위해선 _mandatory_ 메모리 배리어가 필요합니다.
+
+더 많은 정보를 위해선 Documentation/DocBook/deviceiobook.tmpl 을 참고하십시오.
+
+
+인터럽트
+--------
+
+드라이버는 자신의 인터럽트 서비스 루틴에 의해 인터럽트 당할 수 있기 때문에
+드라이버의 이 두 부분은 서로의 디바이스 제어 또는 액세스 부분과 상호 간섭할 수
+있습니다.
+
+스스로에게 인터럽트 당하는 걸 불가능하게 하고, 드라이버의 크리티컬한
+오퍼레이션들을 모두 인터럽트가 불가능하게 된 영역에 집어넣거나 하는 방법 (락의
+한 형태) 으로 이런 상호 간섭을 - 최소한 부분적으로라도 - 줄일 수 있습니다.
+드라이버의 인터럽트 루틴이 실행 중인 동안, 해당 드라이버의 코어는 같은 CPU 에서
+수행되지 않을 것이며, 현재의 인터럽트가 처리되는 중에는 또다시 인터럽트가
+일어나지 못하도록 되어 있으니 인터럽트 핸들러는 그에 대해서는 락을 잡지 않아도
+됩니다.
+
+하지만, 어드레스 레지스터와 데이터 레지스터를 갖는 이더넷 카드를 다루는
+드라이버를 생각해 봅시다. 만약 이 드라이버의 코어가 인터럽트를 비활성화시킨
+채로 이더넷 카드와 대화하고 드라이버의 인터럽트 핸들러가 호출되었다면:
+
+ LOCAL IRQ DISABLE
+ writew(ADDR, 3);
+ writew(DATA, y);
+ LOCAL IRQ ENABLE
+ <interrupt>
+ writew(ADDR, 4);
+ q = readw(DATA);
+ </interrupt>
+
+만약 순서 규칙이 충분히 완화되어 있다면 데이터 레지스터에의 스토어는 어드레스
+레지스터에 두번째로 행해지는 스토어 뒤에 일어날 수도 있습니다:
+
+ STORE *ADDR = 3, STORE *ADDR = 4, STORE *DATA = y, q = LOAD *DATA
+
+
+만약 순서 규칙이 충분히 완화되어 있고 묵시적으로든 명시적으로든 배리어가
+사용되지 않았다면 인터럽트 비활성화 섹션에서 일어난 액세스가 바깥으로 새어서
+인터럽트 내에서 일어난 액세스와 섞일 수 있다고 - 그리고 그 반대도 - 가정해야만
+합니다.
+
+그런 영역 안에서 일어나는 I/O 액세스들은 엄격한 순서 규칙의 I/O 레지스터에
+묵시적 I/O 배리어를 형성하는 동기적 (synchronous) 로드 오퍼레이션을 포함하기
+때문에 일반적으로는 이런게 문제가 되지 않습니다. 만약 이걸로는 충분치 않다면
+mmiowb() 가 명시적으로 사용될 필요가 있습니다.
+
+
+하나의 인터럽트 루틴과 별도의 CPU 에서 수행중이며 서로 통신을 하는 두 루틴
+사이에도 비슷한 상황이 일어날 수 있습니다. 만약 그런 경우가 발생할 가능성이
+있다면, 순서를 보장하기 위해 인터럽트 비활성화 락이 사용되어져야만 합니다.
+
+
+======================
+커널 I/O 배리어의 효과
+======================
+
+I/O 메모리에 액세스할 때, 드라이버는 적절한 액세스 함수를 사용해야 합니다:
+
+ (*) inX(), outX():
+
+ 이것들은 메모리 공간보다는 I/O 공간에 이야기를 하려는 의도로
+ 만들어졌습니다만, 그건 기본적으로 CPU 마다 다른 컨셉입니다. i386 과
+ x86_64 프로세서들은 특별한 I/O 공간 액세스 사이클과 명령어를 실제로 가지고
+ 있지만, 다른 많은 CPU 들에는 그런 컨셉이 존재하지 않습니다.
+
+ 다른 것들 중에서도 PCI 버스가 I/O 공간 컨셉을 정의하는데, 이는 - i386 과
+ x86_64 같은 CPU 에서 - CPU 의 I/O 공간 컨셉으로 쉽게 매치됩니다. 하지만,
+ 대체할 I/O 공간이 없는 CPU 에서는 CPU 의 메모리 맵의 가상 I/O 공간으로
+ 매핑될 수도 있습니다.
+
+ 이 공간으로의 액세스는 (i386 등에서는) 완전하게 동기화 됩니다만, 중간의
+ (PCI 호스트 브리지와 같은) 브리지들은 이를 완전히 보장하진 않을수도
+ 있습니다.
+
+ 이것들의 상호간의 순서는 완전하게 보장됩니다.
+
+ 다른 타입의 메모리 오퍼레이션, I/O 오퍼레이션에 대한 순서는 완전하게
+ 보장되지는 않습니다.
+
+ (*) readX(), writeX():
+
+ 이것들이 수행 요청되는 CPU 에서 서로에게 완전히 순서가 맞춰지고 독립적으로
+ 수행되는지에 대한 보장 여부는 이들이 액세스 하는 메모리 윈도우에 정의된
+ 특성에 의해 결정됩니다. 예를 들어, 최신의 i386 아키텍쳐 머신에서는 MTRR
+ 레지스터로 이 특성이 조정됩니다.
+
+ 일반적으로는, 프리페치 (prefetch) 가능한 디바이스를 액세스 하는게
+ 아니라면, 이것들은 완전히 순서가 맞춰지고 결합되지 않게 보장될 겁니다.
+
+ 하지만, (PCI 브리지와 같은) 중간의 하드웨어는 자신이 원한다면 집행을
+ 연기시킬 수 있습니다; 스토어 명령을 실제로 하드웨어로 내려보내기(flush)
+ 위해서는 같은 위치로부터 로드를 하는 방법이 있습니다만[*], PCI 의 경우는
+ 같은 디바이스나 환경 구성 영역에서의 로드만으로도 충분할 겁니다.
+
+ [*] 주의! 쓰여진 것과 같은 위치로부터의 로드를 시도하는 것은 오동작을
+ 일으킬 수도 있습니다 - 예로 16650 Rx/Tx 시리얼 레지스터를 생각해
+ 보세요.
+
+ 프리페치 가능한 I/O 메모리가 사용되면, 스토어 명령들이 순서를 지키도록
+ 하기 위해 mmiowb() 배리어가 필요할 수 있습니다.
+
+ PCI 트랜잭션 사이의 상호작용에 대해 더 많은 정보를 위해선 PCI 명세서를
+ 참고하시기 바랍니다.
+
+ (*) readX_relaxed(), writeX_relaxed()
+
+ 이것들은 readX() 와 writeX() 랑 비슷하지만, 더 완화된 메모리 순서 보장을
+ 제공합니다. 구체적으로, 이것들은 일반적 메모리 액세스 (예: DMA 버퍼) 에도
+ LOCK 이나 UNLOCK 오퍼레이션들에도 순서를 보장하지 않습니다. LOCK 이나
+ UNLOCK 오퍼레이션들에 맞춰지는 순서가 필요하다면, mmiowb() 배리어가 사용될
+ 수 있습니다. 같은 주변 장치에의 완화된 액세스끼리는 순서가 지켜짐을 알아
+ 두시기 바랍니다.
+
+ (*) ioreadX(), iowriteX()
+
+ 이것들은 inX()/outX() 나 readX()/writeX() 처럼 실제로 수행하는 액세스의
+ 종류에 따라 적절하게 수행될 것입니다.
+
+
+===================================
+가정되는 가장 완화된 실행 순서 모델
+===================================
+
+컨셉적으로 CPU 는 주어진 프로그램에 대해 프로그램 그 자체에는 인과성 (program
+causality) 을 지키는 것처럼 보이게 하지만 일반적으로는 순서를 거의 지켜주지
+않는다고 가정되어야만 합니다. (i386 이나 x86_64 같은) 일부 CPU 들은 코드
+재배치에 (powerpc 나 frv 와 같은) 다른 것들에 비해 강한 제약을 갖지만, 아키텍쳐
+종속적 코드 이외의 코드에서는 순서에 대한 제약이 가장 완화된 경우 (DEC Alpha)
+를 가정해야 합니다.
+
+이 말은, CPU 에게 주어지는 인스트럭션 스트림 내의 한 인스트럭션이 앞의
+인스트럭션에 종속적이라면 앞의 인스트럭션은 뒤의 종속적 인스트럭션이 실행되기
+전에 완료[*]될 수 있어야 한다는 제약 (달리 말해서, 인과성이 지켜지는 것으로
+보이게 함) 외에는 자신이 원하는 순서대로 - 심지어 병렬적으로도 - 그 스트림을
+실행할 수 있음을 의미합니다
+
+ [*] 일부 인스트럭션은 하나 이상의 영향 - 조건 코드를 바꾼다던지, 레지스터나
+ 메모리를 바꾼다던지 - 을 만들어내며, 다른 인스트럭션은 다른 효과에
+ 종속적일 수 있습니다.
+
+CPU 는 최종적으로 아무 효과도 만들지 않는 인스트럭션 시퀀스는 없애버릴 수도
+있습니다. 예를 들어, 만약 두개의 연속되는 인스트럭션이 둘 다 같은 레지스터에
+직접적인 값 (immediate value) 을 집어넣는다면, 첫번째 인스트럭션은 버려질 수도
+있습니다.
+
+
+비슷하게, 컴파일러 역시 프로그램의 인과성만 지켜준다면 인스트럭션 스트림을
+자신이 보기에 올바르다 생각되는대로 재배치 할 수 있습니다.
+
+
+===============
+CPU 캐시의 영향
+===============
+
+캐시된 메모리 오퍼레이션들이 시스템 전체에 어떻게 인지되는지는 CPU 와 메모리
+사이에 존재하는 캐시들, 그리고 시스템 상태의 일관성을 관리하는 메모리 일관성
+시스템에 상당 부분 영향을 받습니다.
+
+한 CPU 가 시스템의 다른 부분들과 캐시를 통해 상호작용한다면, 메모리 시스템은
+CPU 의 캐시들을 포함해야 하며, CPU 와 CPU 자신의 캐시 사이에서의 동작을 위한
+메모리 배리어를 가져야 합니다. (메모리 배리어는 논리적으로는 다음 그림의
+점선에서 동작합니다):
+
+ <--- CPU ---> : <----------- Memory ----------->
+ :
+ +--------+ +--------+ : +--------+ +-----------+
+ | | | | : | | | | +--------+
+ | CPU | | Memory | : | CPU | | | | |
+ | Core |--->| Access |----->| Cache |<-->| | | |
+ | | | Queue | : | | | |--->| Memory |
+ | | | | : | | | | | |
+ +--------+ +--------+ : +--------+ | | | |
+ : | Cache | +--------+
+ : | Coherency |
+ : | Mechanism | +--------+
+ +--------+ +--------+ : +--------+ | | | |
+ | | | | : | | | | | |
+ | CPU | | Memory | : | CPU | | |--->| Device |
+ | Core |--->| Access |----->| Cache |<-->| | | |
+ | | | Queue | : | | | | | |
+ | | | | : | | | | +--------+
+ +--------+ +--------+ : +--------+ +-----------+
+ :
+ :
+
+특정 로드나 스토어는 해당 오퍼레이션을 요청한 CPU 의 캐시 내에서 동작을 완료할
+수도 있기 때문에 해당 CPU 의 바깥에는 보이지 않을 수 있지만, 다른 CPU 가 관심을
+갖는다면 캐시 일관성 메커니즘이 해당 캐시라인을 해당 CPU 에게 전달하고, 해당
+메모리 영역에 대한 오퍼레이션이 발생할 때마다 그 영향을 전파시키기 때문에, 해당
+오퍼레이션은 메모리에 실제로 액세스를 한것처럼 나타날 것입니다.
+
+CPU 코어는 프로그램의 인과성이 유지된다고만 여겨진다면 인스트럭션들을 어떤
+순서로든 재배치해서 수행할 수 있습니다. 일부 인스트럭션들은 로드나 스토어
+오퍼레이션을 만드는데 이 오퍼레이션들은 이후 수행될 메모리 액세스 큐에 들어가게
+됩니다. 코어는 이 오퍼레이션들을 해당 큐에 어떤 순서로든 원하는대로 넣을 수
+있고, 다른 인스트럭션의 완료를 기다리도록 강제되기 전까지는 수행을 계속합니다.
+
+메모리 배리어가 하는 일은 CPU 쪽에서 메모리 쪽으로 넘어가는 액세스들의 순서,
+그리고 그 액세스의 결과가 시스템의 다른 관찰자들에게 인지되는 순서를 제어하는
+것입니다.
+
+[!] CPU 들은 항상 그들 자신의 로드와 스토어는 프로그램 순서대로 일어난 것으로
+보기 때문에, 주어진 CPU 내에서는 메모리 배리어를 사용할 필요가 _없습니다_.
+
+[!] MMIO 나 다른 디바이스 액세스들은 캐시 시스템을 우회할 수도 있습니다. 우회
+여부는 디바이스가 액세스 되는 메모리 윈도우의 특성에 의해 결정될 수도 있고, CPU
+가 가지고 있을 수 있는 특수한 디바이스 통신 인스트럭션의 사용에 의해서 결정될
+수도 있습니다.
+
+
+캐시 일관성
+-----------
+
+하지만 삶은 앞에서 이야기한 것처럼 단순하지 않습니다: 캐시들은 일관적일 것으로
+기대되지만, 그 일관성이 순서에도 적용될 거라는 보장은 없습니다. 한 CPU 에서
+만들어진 변경 사항은 최종적으로는 시스템의 모든 CPU 에게 보여지게 되지만, 다른
+CPU 들에게도 같은 순서로 보이게 될 거라는 보장은 없다는 뜻입니다.
+
+
+두개의 CPU (1 & 2) 가 달려 있고, 각 CPU 에 두개의 데이터 캐시(CPU 1 은 A/B 를,
+CPU 2 는 C/D 를 갖습니다)가 병렬로 연결되어 있는 시스템을 다룬다고 생각해
+봅시다:
+
+ :
+ : +--------+
+ : +---------+ | |
+ +--------+ : +--->| Cache A |<------->| |
+ | | : | +---------+ | |
+ | CPU 1 |<---+ | |
+ | | : | +---------+ | |
+ +--------+ : +--->| Cache B |<------->| |
+ : +---------+ | |
+ : | Memory |
+ : +---------+ | System |
+ +--------+ : +--->| Cache C |<------->| |
+ | | : | +---------+ | |
+ | CPU 2 |<---+ | |
+ | | : | +---------+ | |
+ +--------+ : +--->| Cache D |<------->| |
+ : +---------+ | |
+ : +--------+
+ :
+
+이 시스템이 다음과 같은 특성을 갖는다 생각해 봅시다:
+
+ (*) 홀수번 캐시라인은 캐시 A, 캐시 C 또는 메모리에 위치할 수 있음;
+
+ (*) 짝수번 캐시라인은 캐시 B, 캐시 D 또는 메모리에 위치할 수 있음;
+
+ (*) CPU 코어가 한개의 캐시에 접근하는 동안, 다른 캐시는 - 더티 캐시라인을
+ 메모리에 내리거나 추측성 로드를 하거나 하기 위해 - 시스템의 다른 부분에
+ 액세스 하기 위해 버스를 사용할 수 있음;
+
+ (*) 각 캐시는 시스템의 나머지 부분들과 일관성을 맞추기 위해 해당 캐시에
+ 적용되어야 할 오퍼레이션들의 큐를 가짐;
+
+ (*) 이 일관성 큐는 캐시에 이미 존재하는 라인에 가해지는 평범한 로드에 의해서는
+ 비워지지 않는데, 큐의 오퍼레이션들이 이 로드의 결과에 영향을 끼칠 수 있다
+ 할지라도 그러함.
+
+이제, 첫번째 CPU 에서 두개의 쓰기 오퍼레이션을 만드는데, 해당 CPU 의 캐시에
+요청된 순서로 오퍼레이션이 도달됨을 보장하기 위해 두 오퍼레이션 사이에 쓰기
+배리어를 사용하는 상황을 상상해 봅시다:
+
+ CPU 1 CPU 2 COMMENT
+ =============== =============== =======================================
+ u == 0, v == 1 and p == &u, q == &u
+ v = 2;
+ smp_wmb(); v 의 변경이 p 의 변경 전에 보일 것을
+ 분명히 함
+ <A:modify v=2> v 는 이제 캐시 A 에 독점적으로 존재함
+ p = &v;
+ <B:modify p=&v> p 는 이제 캐시 B 에 독점적으로 존재함
+
+여기서의 쓰기 메모리 배리어는 CPU 1 의 캐시가 올바른 순서로 업데이트 된 것으로
+시스템의 다른 CPU 들이 인지하게 만듭니다. 하지만, 이제 두번째 CPU 가 그 값들을
+읽으려 하는 상황을 생각해 봅시다:
+
+ CPU 1 CPU 2 COMMENT
+ =============== =============== =======================================
+ ...
+ q = p;
+ x = *q;
+
+위의 두개의 읽기 오퍼레이션은 예상된 순서로 일어나지 못할 수 있는데, 두번째 CPU
+의 한 캐시에 다른 캐시 이벤트가 발생해 v 를 담고 있는 캐시라인의 해당 캐시에의
+업데이트가 지연되는 사이, p 를 담고 있는 캐시라인은 두번째 CPU 의 다른 캐시에
+업데이트 되어버렸을 수 있기 때문입니다.
+
+ CPU 1 CPU 2 COMMENT
+ =============== =============== =======================================
+ u == 0, v == 1 and p == &u, q == &u
+ v = 2;
+ smp_wmb();
+ <A:modify v=2> <C:busy>
+ <C:queue v=2>
+ p = &v; q = p;
+ <D:request p>
+ <B:modify p=&v> <D:commit p=&v>
+ <D:read p>
+ x = *q;
+ <C:read *q> 캐시에 업데이트 되기 전의 v 를 읽음
+ <C:unbusy>
+ <C:commit v=2>
+
+기본적으로, 두개의 캐시라인 모두 CPU 2 에 최종적으로는 업데이트 될 것이지만,
+별도의 개입 없이는, 업데이트의 순서가 CPU 1 에서 만들어진 순서와 동일할
+것이라는 보장이 없습니다.
+
+
+여기에 개입하기 위해선, 데이터 의존성 배리어나 읽기 배리어를 로드 오퍼레이션들
+사이에 넣어야 합니다. 이렇게 함으로써 캐시가 다음 요청을 처리하기 전에 일관성
+큐를 처리하도록 강제하게 됩니다.
+
+ CPU 1 CPU 2 COMMENT
+ =============== =============== =======================================
+ u == 0, v == 1 and p == &u, q == &u
+ v = 2;
+ smp_wmb();
+ <A:modify v=2> <C:busy>
+ <C:queue v=2>
+ p = &v; q = p;
+ <D:request p>
+ <B:modify p=&v> <D:commit p=&v>
+ <D:read p>
+ smp_read_barrier_depends()
+ <C:unbusy>
+ <C:commit v=2>
+ x = *q;
+ <C:read *q> 캐시에 업데이트 된 v 를 읽음
+
+
+이런 부류의 문제는 DEC Alpha 계열 프로세서들에서 발견될 수 있는데, 이들은
+데이터 버스를 좀 더 잘 사용해 성능을 개선할 수 있는, 분할된 캐시를 가지고 있기
+때문입니다. 대부분의 CPU 는 하나의 읽기 오퍼레이션의 메모리 액세스가 다른 읽기
+오퍼레이션에 의존적이라면 데이터 의존성 배리어를 내포시킵니다만, 모두가 그런건
+아니기 때문에 이점에 의존해선 안됩니다.
+
+다른 CPU 들도 분할된 캐시를 가지고 있을 수 있지만, 그런 CPU 들은 평범한 메모리
+액세스를 위해서도 이 분할된 캐시들 사이의 조정을 해야만 합니다. Alpha 는 가장
+약한 메모리 순서 시맨틱 (semantic) 을 선택함으로써 메모리 배리어가 명시적으로
+사용되지 않았을 때에는 그런 조정이 필요하지 않게 했습니다.
+
+
+캐시 일관성 VS DMA
+------------------
+
+모든 시스템이 DMA 를 하는 디바이스에 대해서까지 캐시 일관성을 유지하지는
+않습니다. 그런 경우, DMA 를 시도하는 디바이스는 RAM 으로부터 잘못된 데이터를
+읽을 수 있는데, 더티 캐시 라인이 CPU 의 캐시에 머무르고 있고, 바뀐 값이 아직
+RAM 에 써지지 않았을 수 있기 때문입니다. 이 문제를 해결하기 위해선, 커널의
+적절한 부분에서 각 CPU 캐시의 문제되는 비트들을 플러시 (flush) 시켜야만 합니다
+(그리고 그것들을 무효화 - invalidation - 시킬 수도 있겠죠).
+
+또한, 디바이스에 의해 RAM 에 DMA 로 쓰여진 값은 디바이스가 쓰기를 완료한 후에
+CPU 의 캐시에서 RAM 으로 쓰여지는 더티 캐시 라인에 의해 덮어써질 수도 있고, CPU
+의 캐시에 존재하는 캐시 라인이 해당 캐시에서 삭제되고 다시 값을 읽어들이기
+전까지는 RAM 이 업데이트 되었다는 사실 자체가 숨겨져 버릴 수도 있습니다. 이
+문제를 해결하기 위해선, 커널의 적절한 부분에서 각 CPU 의 캐시 안의 문제가 되는
+비트들을 무효화 시켜야 합니다.
+
+캐시 관리에 대한 더 많은 정보를 위해선 Documentation/cachetlb.txt 를
+참고하세요.
+
+
+캐시 일관성 VS MMIO
+-------------------
+
+Memory mapped I/O 는 일반적으로 CPU 의 메모리 공간 내의 한 윈도우의 특정 부분
+내의 메모리 지역에 이루어지는데, 이 윈도우는 일반적인, RAM 으로 향하는
+윈도우와는 다른 특성을 갖습니다.
+
+그런 특성 가운데 하나는, 일반적으로 그런 액세스는 캐시를 완전히 우회하고
+디바이스 버스로 곧바로 향한다는 것입니다. 이 말은 MMIO 액세스는 먼저
+시작되어서 캐시에서 완료된 메모리 액세스를 추월할 수 있다는 뜻입니다. 이런
+경우엔 메모리 배리어만으로는 충분치 않고, 만약 캐시된 메모리 쓰기 오퍼레이션과
+MMIO 액세스가 어떤 방식으로든 의존적이라면 해당 캐시는 두 오퍼레이션 사이에
+비워져(flush)야만 합니다.
+
+
+======================
+CPU 들이 저지르는 일들
+======================
+
+프로그래머는 CPU 가 메모리 오퍼레이션들을 정확히 요청한대로 수행해 줄 것이라고
+생각하는데, 예를 들어 다음과 같은 코드를 CPU 에게 넘긴다면:
+
+ a = READ_ONCE(*A);
+ WRITE_ONCE(*B, b);
+ c = READ_ONCE(*C);
+ d = READ_ONCE(*D);
+ WRITE_ONCE(*E, e);
+
+CPU 는 다음 인스트럭션을 처리하기 전에 현재의 인스트럭션을 위한 메모리
+오퍼레이션을 완료할 것이라 생각하고, 따라서 시스템 외부에서 관찰하기에도 정해진
+순서대로 오퍼레이션이 수행될 것으로 예상합니다:
+
+ LOAD *A, STORE *B, LOAD *C, LOAD *D, STORE *E.
+
+
+당연하지만, 실제로는 훨씬 엉망입니다. 많은 CPU 와 컴파일러에서 앞의 가정은
+성립하지 못하는데 그 이유는 다음과 같습니다:
+
+ (*) 로드 오퍼레이션들은 실행을 계속 해나가기 위해 곧바로 완료될 필요가 있는
+ 경우가 많은 반면, 스토어 오퍼레이션들은 종종 별다른 문제 없이 유예될 수
+ 있습니다;
+
+ (*) 로드 오퍼레이션들은 예측적으로 수행될 수 있으며, 필요없는 로드였다고
+ 증명된 예측적 로드의 결과는 버려집니다;
+
+ (*) 로드 오퍼레이션들은 예측적으로 수행될 수 있으므로, 예상된 이벤트의
+ 시퀀스와 다른 시간에 로드가 이뤄질 수 있습니다;
+
+ (*) 메모리 액세스 순서는 CPU 버스와 캐시를 좀 더 잘 사용할 수 있도록 재배치
+ 될 수 있습니다;
+
+ (*) 로드와 스토어는 인접한 위치에의 액세스들을 일괄적으로 처리할 수 있는
+ 메모리나 I/O 하드웨어 (메모리와 PCI 디바이스 둘 다 이게 가능할 수
+ 있습니다) 에 대해 요청되는 경우, 개별 오퍼레이션을 위한 트랜잭션 설정
+ 비용을 아끼기 위해 조합되어 실행될 수 있습니다; 그리고
+
+ (*) 해당 CPU 의 데이터 캐시가 순서에 영향을 끼칠 수도 있고, 캐시 일관성
+ 메커니즘이 - 스토어가 실제로 캐시에 도달한다면 - 이 문제를 완화시킬 수는
+ 있지만 이 일관성 관리가 다른 CPU 들에도 같은 순서로 전달된다는 보장은
+ 없습니다.
+
+따라서, 앞의 코드에 대해 다른 CPU 가 보는 결과는 다음과 같을 수 있습니다:
+
+ LOAD *A, ..., LOAD {*C,*D}, STORE *E, STORE *B
+
+ ("LOAD {*C,*D}" 는 조합된 로드입니다)
+
+
+하지만, CPU 는 스스로는 일관적일 것을 보장합니다: CPU _자신_ 의 액세스들은
+자신에게는 메모리 배리어가 없음에도 불구하고 정확히 순서 세워진 것으로 보여질
+것입니다. 예를 들어 다음의 코드가 주어졌다면:
+
+ U = READ_ONCE(*A);
+ WRITE_ONCE(*A, V);
+ WRITE_ONCE(*A, W);
+ X = READ_ONCE(*A);
+ WRITE_ONCE(*A, Y);
+ Z = READ_ONCE(*A);
+
+그리고 외부의 영향에 의한 간섭이 없다고 가정하면, 최종 결과는 다음과 같이
+나타날 것이라고 예상될 수 있습니다:
+
+ U == *A 의 최초 값
+ X == W
+ Z == Y
+ *A == Y
+
+앞의 코드는 CPU 가 다음의 메모리 액세스 시퀀스를 만들도록 할겁니다:
+
+ U=LOAD *A, STORE *A=V, STORE *A=W, X=LOAD *A, STORE *A=Y, Z=LOAD *A
+
+하지만, 별다른 개입이 없고 프로그램의 시야에 이 세상이 여전히 일관적이라고
+보인다는 보장만 지켜진다면 이 시퀀스는 어떤 조합으로든 재구성될 수 있으며, 각
+액세스들은 합쳐지거나 버려질 수 있습니다. 일부 아키텍쳐에서 CPU 는 같은 위치에
+대한 연속적인 로드 오퍼레이션들을 재배치 할 수 있기 때문에 앞의 예에서의
+READ_ONCE() 와 WRITE_ONCE() 는 반드시 존재해야 함을 알아두세요. 그런 종류의
+아키텍쳐에서 READ_ONCE() 와 WRITE_ONCE() 는 이 문제를 막기 위해 필요한 일을
+뭐가 됐든지 하게 되는데, 예를 들어 Itanium 에서는 READ_ONCE() 와 WRITE_ONCE()
+가 사용하는 volatile 캐스팅은 GCC 가 그런 재배치를 방지하는 특수 인스트럭션인
+ld.acq 와 stl.rel 인스트럭션을 각각 만들어 내도록 합니다.
+
+컴파일러 역시 이 시퀀스의 액세스들을 CPU 가 보기도 전에 합치거나 버리거나 뒤로
+미뤄버릴 수 있습니다.
+
+예를 들어:
+
+ *A = V;
+ *A = W;
+
+는 다음과 같이 변형될 수 있습니다:
+
+ *A = W;
+
+따라서, 쓰기 배리어나 WRITE_ONCE() 가 없다면 *A 로의 V 값의 저장의 효과는
+사라진다고 가정될 수 있습니다. 비슷하게:
+
+ *A = Y;
+ Z = *A;
+
+는, 메모리 배리어나 READ_ONCE() 와 WRITE_ONCE() 없이는 다음과 같이 변형될 수
+있습니다:
+
+ *A = Y;
+ Z = Y;
+
+그리고 이 LOAD 오퍼레이션은 CPU 바깥에는 아예 보이지 않습니다.
+
+
+그리고, ALPHA 가 있다
+---------------------
+
+DEC Alpha CPU 는 가장 완화된 메모리 순서의 CPU 중 하나입니다. 뿐만 아니라,
+Alpha CPU 의 일부 버전은 분할된 데이터 캐시를 가지고 있어서, 의미적으로
+관계되어 있는 두개의 캐시 라인이 서로 다른 시간에 업데이트 되는게 가능합니다.
+이게 데이터 의존성 배리어가 정말 필요해지는 부분인데, 데이터 의존성 배리어는
+메모리 일관성 시스템과 함께 두개의 캐시를 동기화 시켜서, 포인터 변경과 새로운
+데이터의 발견을 올바른 순서로 일어나게 하기 때문입니다.
+
+리눅스 커널의 메모리 배리어 모델은 Alpha 에 기초해서 정의되었습니다.
+
+위의 "캐시 일관성" 서브섹션을 참고하세요.
+
+
+가상 머신 게스트
+----------------
+
+가상 머신에서 동작하는 게스트들은 게스트 자체는 SMP 지원 없이 컴파일 되었다
+해도 SMP 영향을 받을 수 있습니다. 이건 UP 커널을 사용하면서 SMP 호스트와
+결부되어 발생하는 부작용입니다. 이 경우에는 mandatory 배리어를 사용해서 문제를
+해결할 수 있겠지만 그런 해결은 대부분의 경우 최적의 해결책이 아닙니다.
+
+이 문제를 완벽하게 해결하기 위해, 로우 레벨의 virt_mb() 등의 매크로를 사용할 수
+있습니다. 이것들은 SMP 가 활성화 되어 있다면 smp_mb() 등과 동일한 효과를
+갖습니다만, SMP 와 SMP 아닌 시스템 모두에 대해 동일한 코드를 만들어냅니다.
+예를 들어, 가상 머신 게스트들은 (SMP 일 수 있는) 호스트와 동기화를 할 때에는
+smp_mb() 가 아니라 virt_mb() 를 사용해야 합니다.
+
+이것들은 smp_mb() 류의 것들과 모든 부분에서 동일하며, 특히, MMIO 의 영향에
+대해서는 간여하지 않습니다: MMIO 의 영향을 제어하려면, mandatory 배리어를
+사용하시기 바랍니다.
+
+
+=======
+사용 예
+=======
+
+순환식 버퍼
+-----------
+
+메모리 배리어는 순환식 버퍼를 생성자(producer)와 소비자(consumer) 사이의
+동기화에 락을 사용하지 않고 구현하는데에 사용될 수 있습니다. 더 자세한 내용을
+위해선 다음을 참고하세요:
+
+ Documentation/circular-buffers.txt
+
+
+=========
+참고 문헌
+=========
+
+Alpha AXP Architecture Reference Manual, Second Edition (Sites & Witek,
+Digital Press)
+ Chapter 5.2: Physical Address Space Characteristics
+ Chapter 5.4: Caches and Write Buffers
+ Chapter 5.5: Data Sharing
+ Chapter 5.6: Read/Write Ordering
+
+AMD64 Architecture Programmer's Manual Volume 2: System Programming
+ Chapter 7.1: Memory-Access Ordering
+ Chapter 7.4: Buffering and Combining Memory Writes
+
+IA-32 Intel Architecture Software Developer's Manual, Volume 3:
+System Programming Guide
+ Chapter 7.1: Locked Atomic Operations
+ Chapter 7.2: Memory Ordering
+ Chapter 7.4: Serializing Instructions
+
+The SPARC Architecture Manual, Version 9
+ Chapter 8: Memory Models
+ Appendix D: Formal Specification of the Memory Models
+ Appendix J: Programming with the Memory Models
+
+UltraSPARC Programmer Reference Manual
+ Chapter 5: Memory Accesses and Cacheability
+ Chapter 15: Sparc-V9 Memory Models
+
+UltraSPARC III Cu User's Manual
+ Chapter 9: Memory Models
+
+UltraSPARC IIIi Processor User's Manual
+ Chapter 8: Memory Models
+
+UltraSPARC Architecture 2005
+ Chapter 9: Memory
+ Appendix D: Formal Specifications of the Memory Models
+
+UltraSPARC T1 Supplement to the UltraSPARC Architecture 2005
+ Chapter 8: Memory Models
+ Appendix F: Caches and Cache Coherency
+
+Solaris Internals, Core Kernel Architecture, p63-68:
+ Chapter 3.3: Hardware Considerations for Locks and
+ Synchronization
+
+Unix Systems for Modern Architectures, Symmetric Multiprocessing and Caching
+for Kernel Programmers:
+ Chapter 13: Other Memory Models
+
+Intel Itanium Architecture Software Developer's Manual: Volume 1:
+ Section 2.6: Speculation
+ Section 4.4: Memory Access
diff --git a/Documentation/locking/lglock.txt b/Documentation/locking/lglock.txt
deleted file mode 100644
index a6971e3..0000000
--- a/Documentation/locking/lglock.txt
+++ /dev/null
@@ -1,166 +0,0 @@
-lglock - local/global locks for mostly local access patterns
-------------------------------------------------------------
-
-Origin: Nick Piggin's VFS scalability series introduced during
- 2.6.35++ [1] [2]
-Location: kernel/locking/lglock.c
- include/linux/lglock.h
-Users: currently only the VFS and stop_machine related code
-
-Design Goal:
-------------
-
-Improve scalability of globally used large data sets that are
-distributed over all CPUs as per_cpu elements.
-
-To manage global data structures that are partitioned over all CPUs
-as per_cpu elements but can be mostly handled by CPU local actions
-lglock will be used where the majority of accesses are cpu local
-reading and occasional cpu local writing with very infrequent
-global write access.
-
-
-* deal with things locally whenever possible
- - very fast access to the local per_cpu data
- - reasonably fast access to specific per_cpu data on a different
- CPU
-* while making global action possible when needed
- - by expensive access to all CPUs locks - effectively
- resulting in a globally visible critical section.
-
-Design:
--------
-
-Basically it is an array of per_cpu spinlocks with the
-lg_local_lock/unlock accessing the local CPUs lock object and the
-lg_local_lock_cpu/unlock_cpu accessing a remote CPUs lock object
-the lg_local_lock has to disable preemption as migration protection so
-that the reference to the local CPUs lock does not go out of scope.
-Due to the lg_local_lock/unlock only touching cpu-local resources it
-is fast. Taking the local lock on a different CPU will be more
-expensive but still relatively cheap.
-
-One can relax the migration constraints by acquiring the current
-CPUs lock with lg_local_lock_cpu, remember the cpu, and release that
-lock at the end of the critical section even if migrated. This should
-give most of the performance benefits without inhibiting migration
-though needs careful considerations for nesting of lglocks and
-consideration of deadlocks with lg_global_lock.
-
-The lg_global_lock/unlock locks all underlying spinlocks of all
-possible CPUs (including those off-line). The preemption disable/enable
-are needed in the non-RT kernels to prevent deadlocks like:
-
- on cpu 1
-
- task A task B
- lg_global_lock
- got cpu 0 lock
- <<<< preempt <<<<
- lg_local_lock_cpu for cpu 0
- spin on cpu 0 lock
-
-On -RT this deadlock scenario is resolved by the arch_spin_locks in the
-lglocks being replaced by rt_mutexes which resolve the above deadlock
-by boosting the lock-holder.
-
-
-Implementation:
----------------
-
-The initial lglock implementation from Nick Piggin used some complex
-macros to generate the lglock/brlock in lglock.h - they were later
-turned into a set of functions by Andi Kleen [7]. The change to functions
-was motivated by the presence of multiple lock users and also by them
-being easier to maintain than the generating macros. This change to
-functions is also the basis to eliminated the restriction of not
-being initializeable in kernel modules (the remaining problem is that
-locks are not explicitly initialized - see lockdep-design.txt)
-
-Declaration and initialization:
--------------------------------
-
- #include <linux/lglock.h>
-
- DEFINE_LGLOCK(name)
- or:
- DEFINE_STATIC_LGLOCK(name);
-
- lg_lock_init(&name, "lockdep_name_string");
-
- on UP this is mapped to DEFINE_SPINLOCK(name) in both cases, note
- also that as of 3.18-rc6 all declaration in use are of the _STATIC_
- variant (and it seems that the non-static was never in use).
- lg_lock_init is initializing the lockdep map only.
-
-Usage:
-------
-
-From the locking semantics it is a spinlock. It could be called a
-locality aware spinlock. lg_local_* behaves like a per_cpu
-spinlock and lg_global_* like a global spinlock.
-No surprises in the API.
-
- lg_local_lock(*lglock);
- access to protected per_cpu object on this CPU
- lg_local_unlock(*lglock);
-
- lg_local_lock_cpu(*lglock, cpu);
- access to protected per_cpu object on other CPU cpu
- lg_local_unlock_cpu(*lglock, cpu);
-
- lg_global_lock(*lglock);
- access all protected per_cpu objects on all CPUs
- lg_global_unlock(*lglock);
-
- There are no _trylock variants of the lglocks.
-
-Note that the lg_global_lock/unlock has to iterate over all possible
-CPUs rather than the actually present CPUs or a CPU could go off-line
-with a held lock [4] and that makes it very expensive. A discussion on
-these issues can be found at [5]
-
-Constraints:
-------------
-
- * currently the declaration of lglocks in kernel modules is not
- possible, though this should be doable with little change.
- * lglocks are not recursive.
- * suitable for code that can do most operations on the CPU local
- data and will very rarely need the global lock
- * lg_global_lock/unlock is *very* expensive and does not scale
- * on UP systems all lg_* primitives are simply spinlocks
- * in PREEMPT_RT the spinlock becomes an rt-mutex and can sleep but
- does not change the tasks state while sleeping [6].
- * in PREEMPT_RT the preempt_disable/enable in lg_local_lock/unlock
- is downgraded to a migrate_disable/enable, the other
- preempt_disable/enable are downgraded to barriers [6].
- The deadlock noted for non-RT above is resolved due to rt_mutexes
- boosting the lock-holder in this case which arch_spin_locks do
- not do.
-
-lglocks were designed for very specific problems in the VFS and probably
-only are the right answer in these corner cases. Any new user that looks
-at lglocks probably wants to look at the seqlock and RCU alternatives as
-her first choice. There are also efforts to resolve the RCU issues that
-currently prevent using RCU in place of view remaining lglocks.
-
-Note on brlock history:
------------------------
-
-The 'Big Reader' read-write spinlocks were originally introduced by
-Ingo Molnar in 2000 (2.4/2.5 kernel series) and removed in 2003. They
-later were introduced by the VFS scalability patch set in 2.6 series
-again as the "big reader lock" brlock [2] variant of lglock which has
-been replaced by seqlock primitives or by RCU based primitives in the
-3.13 kernel series as was suggested in [3] in 2003. The brlock was
-entirely removed in the 3.13 kernel series.
-
-Link: 1 http://lkml.org/lkml/2010/8/2/81
-Link: 2 http://lwn.net/Articles/401738/
-Link: 3 http://lkml.org/lkml/2003/3/9/205
-Link: 4 https://lkml.org/lkml/2011/8/24/185
-Link: 5 http://lkml.org/lkml/2011/12/18/189
-Link: 6 https://www.kernel.org/pub/linux/kernel/projects/rt/
- patch series - lglocks-rt.patch.patch
-Link: 7 http://lkml.org/lkml/2012/3/5/26
diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst
index 04ee900..201d483 100644
--- a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst
@@ -144,7 +144,7 @@
- ``flags``
- - Flags. No flags are defined yet, so set this to 0.
+ - Flags. See :ref:`cec-log-addrs-flags` for a list of available flags.
- .. row 7
@@ -201,6 +201,25 @@
give the CEC framework more information about the device type, even
though the framework won't use it directly in the CEC message.
+.. _cec-log-addrs-flags:
+
+.. flat-table:: Flags for struct cec_log_addrs
+ :header-rows: 0
+ :stub-columns: 0
+ :widths: 3 1 4
+
+
+ - .. _`CEC-LOG-ADDRS-FL-ALLOW-UNREG-FALLBACK`:
+
+ - ``CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK``
+
+ - 1
+
+ - By default if no logical address of the requested type can be claimed, then
+ it will go back to the unconfigured state. If this flag is set, then it will
+ fallback to the Unregistered logical address. Note that if the Unregistered
+ logical address was explicitly requested, then this flag has no effect.
+
.. _cec-versions:
.. flat-table:: CEC Versions
diff --git a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst
index 7a6d6d0..2e1e7392 100644
--- a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst
@@ -64,7 +64,8 @@
- ``phys_addr``
- - The current physical address.
+ - The current physical address. This is ``CEC_PHYS_ADDR_INVALID`` if no
+ valid physical address is set.
- .. row 2
@@ -72,7 +73,10 @@
- ``log_addr_mask``
- - The current set of claimed logical addresses.
+ - The current set of claimed logical addresses. This is 0 if no logical
+ addresses are claimed or if ``phys_addr`` is ``CEC_PHYS_ADDR_INVALID``.
+ If bit 15 is set (``1 << CEC_LOG_ADDR_UNREGISTERED``) then this device
+ has the unregistered logical address. In that case all other bits are 0.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index a4d0a99..ba818ec 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -609,7 +609,7 @@
The data-dependency barrier must order the read into Q with the store
into *Q. This prohibits this outcome:
- (Q == B) && (B == 4)
+ (Q == &B) && (B == 4)
Please note that this pattern should be rare. After all, the whole point
of dependency ordering is to -prevent- writes to the data structure, along
@@ -1928,6 +1928,7 @@
See Documentation/DMA-API.txt for more information on consistent memory.
+
MMIO WRITE BARRIER
------------------
@@ -2075,7 +2076,7 @@
anything at all - especially with respect to I/O accesses - unless combined
with interrupt disabling operations.
-See also the section on "Inter-CPU locking barrier effects".
+See also the section on "Inter-CPU acquiring barrier effects".
As an example, consider the following:
diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.txt
index 53a2fe1..8e37b0b 100644
--- a/Documentation/scheduler/sched-deadline.txt
+++ b/Documentation/scheduler/sched-deadline.txt
@@ -16,6 +16,7 @@
4.1 System-wide settings
4.2 Task interface
4.3 Default behavior
+ 4.4 Behavior of sched_yield()
5. Tasks CPU affinity
5.1 SCHED_DEADLINE and cpusets HOWTO
6. Future plans
@@ -426,6 +427,23 @@
Finally, notice that in order not to jeopardize the admission control a
-deadline task cannot fork.
+
+4.4 Behavior of sched_yield()
+-----------------------------
+
+ When a SCHED_DEADLINE task calls sched_yield(), it gives up its
+ remaining runtime and is immediately throttled, until the next
+ period, when its runtime will be replenished (a special flag
+ dl_yielded is set and used to handle correctly throttling and runtime
+ replenishment after a call to sched_yield()).
+
+ This behavior of sched_yield() allows the task to wake-up exactly at
+ the beginning of the next period. Also, this may be useful in the
+ future with bandwidth reclaiming mechanisms, where sched_yield() will
+ make the leftoever runtime available for reclamation by other
+ SCHED_DEADLINE tasks.
+
+
5. Tasks CPU affinity
=====================
diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
index 477927b..ea8d7b4 100644
--- a/Documentation/static-keys.txt
+++ b/Documentation/static-keys.txt
@@ -15,6 +15,8 @@
DEFINE_STATIC_KEY_TRUE(key);
DEFINE_STATIC_KEY_FALSE(key);
+DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
+DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count);
static_branch_likely()
static_branch_unlikely()
@@ -140,6 +142,13 @@
key is initialized false, a 'static_branch_inc()', will change the branch to
true. And then a 'static_branch_dec()', will again make the branch false.
+Where an array of keys is required, it can be defined as:
+
+ DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
+
+or:
+
+ DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count);
4) Architecture level code patching interface, 'jump labels'
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index dd5f916..a273dd0 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -203,6 +203,17 @@
Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer.
+HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+--------------------------------
+
+An arch may pass in a pointer to the return address on the stack. This
+prevents potential stack unwinding issues where the unwinder gets out of
+sync with ret_stack and the wrong addresses are reported by
+ftrace_graph_ret_addr().
+
+Adding support for it is easy: just define the macro in asm/ftrace.h and
+pass the return address pointer as the 'retp' argument to
+ftrace_push_return_trace().
HAVE_FTRACE_NMI_ENTER
---------------------
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index ea52ec1..e4991fb 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -44,8 +44,8 @@
+|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
- (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield
- are supported.
+ (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
+ (x8/x16/x32/x64), "string" and bitfield are supported.
(*) only for return probe.
(**) this is useful for fetching a field of data structures.
@@ -54,7 +54,10 @@
-----
Several types are supported for fetch-args. Kprobe tracer will access memory
by given type. Prefix 's' and 'u' means those types are signed and unsigned
-respectively. Traced arguments are shown in decimal (signed) or hex (unsigned).
+respectively. 'x' prefix implies it is unsigned. Traced arguments are shown
+in decimal ('s' and 'u') or hexadecimal ('x'). Without type casting, 'x32'
+or 'x64' is used depends on the architecture (e.g. x86-32 uses x32, and
+x86-64 uses x64).
String type is a special type, which fetches a "null-terminated" string from
kernel space. This means it will fail and store NULL if the string container
has been paged out.
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
index 72d1cd4..94b6b45 100644
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -40,8 +40,8 @@
+|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
- (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield
- are supported.
+ (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
+ (x8/x16/x32/x64), "string" and bitfield are supported.
(*) only for return probe.
(**) this is useful for fetching a field of data structures.
@@ -50,7 +50,10 @@
-----
Several types are supported for fetch-args. Uprobe tracer will access memory
by given type. Prefix 's' and 'u' means those types are signed and unsigned
-respectively. Traced arguments are shown in decimal (signed) or hex (unsigned).
+respectively. 'x' prefix implies it is unsigned. Traced arguments are shown
+in decimal ('s' and 'u') or hexadecimal ('x'). Without type casting, 'x32'
+or 'x64' is used depends on the architecture (e.g. x86-32 uses x32, and
+x86-64 uses x64).
String type is a special type, which fetches a "null-terminated" string from
user space.
Bitfield is another special type, which takes 3 parameters, bit-width, bit-
diff --git a/Documentation/vme_api.txt b/Documentation/vme_api.txt
index ca5b827..9000655 100644
--- a/Documentation/vme_api.txt
+++ b/Documentation/vme_api.txt
@@ -8,13 +8,14 @@
with the VME subsystem, typically called from the devices init routine. This is
achieved via a call to the following function:
- int vme_register_driver (struct vme_driver *driver);
+ int vme_register_driver (struct vme_driver *driver, unsigned int ndevs);
If driver registration is successful this function returns zero, if an error
occurred a negative error code will be returned.
A pointer to a structure of type 'vme_driver' must be provided to the
-registration function. The structure is as follows:
+registration function. Along with ndevs, which is the number of devices your
+driver is able to support. The structure is as follows:
struct vme_driver {
struct list_head node;
@@ -32,8 +33,8 @@
should be correctly set. The '.name' element is a pointer to a string holding
the device driver's name.
-The '.match' function allows controlling the number of devices that need to
-be registered. The match function should return 1 if a device should be
+The '.match' function allows control over which VME devices should be registered
+with the driver. The match function should return 1 if a device should be
probed and 0 otherwise. This example match function (from vme_user.c) limits
the number of devices probed to one:
@@ -385,13 +386,13 @@
adjacent locations:
int vme_lm_attach(struct vme_resource *res, int num,
- void (*callback)(int));
+ void (*callback)(void *));
int vme_lm_detach(struct vme_resource *res, int num);
The callback function is declared as follows.
- void callback(int num);
+ void callback(void *data);
Slot Detection
diff --git a/MAINTAINERS b/MAINTAINERS
index 93c3213..d33c038 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -913,15 +913,17 @@
ARM PMU PROFILING AND DEBUGGING
M: Will Deacon <will.deacon@arm.com>
-R: Mark Rutland <mark.rutland@arm.com>
+M: Mark Rutland <mark.rutland@arm.com>
S: Maintained
+L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
F: arch/arm*/kernel/perf_*
F: arch/arm/oprofile/common.c
F: arch/arm*/kernel/hw_breakpoint.c
F: arch/arm*/include/asm/hw_breakpoint.h
F: arch/arm*/include/asm/perf_event.h
-F: drivers/perf/arm_pmu.c
+F: drivers/perf/*
F: include/linux/perf/arm_pmu.h
+F: Documentation/devicetree/bindings/arm/pmu.txt
ARM PORT
M: Russell King <linux@armlinux.org.uk>
@@ -1123,6 +1125,11 @@
F: Documentation/trace/coresight.txt
F: Documentation/devicetree/bindings/arm/coresight.txt
F: Documentation/ABI/testing/sysfs-bus-coresight-devices-*
+F: tools/perf/arch/arm/util/pmu.c
+F: tools/perf/arch/arm/util/auxtrace.c
+F: tools/perf/arch/arm/util/cs-etm.c
+F: tools/perf/arch/arm/util/cs-etm.h
+F: tools/perf/util/cs-etm.h
ARM/CORGI MACHINE SUPPORT
M: Richard Purdie <rpurdie@rpsys.net>
@@ -2114,11 +2121,6 @@
S: Maintained
F: drivers/mmc/host/atmel-mci.c
-ATMEL AT91 / AT32 SERIAL DRIVER
-M: Nicolas Ferre <nicolas.ferre@atmel.com>
-S: Supported
-F: drivers/tty/serial/atmel_serial.c
-
ATMEL AT91 SAMA5D2-Compatible Shutdown Controller
M: Nicolas Ferre <nicolas.ferre@atmel.com>
S: Supported
@@ -2501,7 +2503,7 @@
F: kernel/bpf/
BROADCOM B44 10/100 ETHERNET DRIVER
-M: Gary Zambrano <zambrano@broadcom.com>
+M: Michael Chan <michael.chan@broadcom.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/broadcom/b44.*
@@ -3282,6 +3284,7 @@
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates)
+F: Documentation/cpu-freq/
F: drivers/cpufreq/
F: include/linux/cpufreq.h
@@ -4584,6 +4587,13 @@
S: Maintained
F: drivers/video/fbdev/efifb.c
+EFI TEST DRIVER
+L: linux-efi@vger.kernel.org
+M: Ivan Hu <ivan.hu@canonical.com>
+M: Matt Fleming <matt@codeblueprint.co.uk>
+S: Maintained
+F: drivers/firmware/efi/test/
+
EFS FILESYSTEM
W: http://aeschi.ch.eu.org/efs/
S: Orphan
@@ -4851,6 +4861,7 @@
FIRMWARE LOADER (request_firmware)
M: Ming Lei <ming.lei@canonical.com>
+M: Luis R. Rodriguez <mcgrof@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
F: Documentation/firmware_class/
@@ -6103,7 +6114,7 @@
F: drivers/cpufreq/intel_pstate.c
INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
-M: Maik Broemme <mbroemme@plusserver.de>
+M: Maik Broemme <mbroemme@libmpq.org>
L: linux-fbdev@vger.kernel.org
S: Maintained
F: Documentation/fb/intelfb.txt
@@ -7443,9 +7454,8 @@
F: drivers/hwmon/max20751.c
MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER
-M: "Hans J. Koch" <hjk@hansjkoch.de>
L: linux-hwmon@vger.kernel.org
-S: Maintained
+S: Orphan
F: Documentation/hwmon/max6650
F: drivers/hwmon/max6650.c
@@ -7668,6 +7678,12 @@
Q: http://patchwork.ozlabs.org/project/netdev/list/
F: drivers/net/ethernet/mellanox/mlxsw/
+MELLANOX PLATFORM DRIVER
+M: Vadim Pasternak <vadimp@mellanox.com>
+L: platform-driver-x86@vger.kernel.org
+S: Supported
+F: arch/x86/platform/mellanox/mlx-platform.c
+
SOFT-ROCE DRIVER (rxe)
M: Moni Shoua <monis@mellanox.com>
L: linux-rdma@vger.kernel.org
@@ -7753,6 +7769,12 @@
S: Supported
F: arch/microblaze/
+MICROCHIP / ATMEL AT91 / AT32 SERIAL DRIVER
+M: Richard Genoud <richard.genoud@gmail.com>
+S: Maintained
+F: drivers/tty/serial/atmel_serial.c
+F: include/linux/atmel_serial.h
+
MICROSOFT SURFACE PRO 3 BUTTON DRIVER
M: Chen Yu <yu.c.chen@intel.com>
L: platform-driver-x86@vger.kernel.org
@@ -8161,6 +8183,15 @@
W: https://fedorahosted.org/dropwatch/
F: net/core/drop_monitor.c
+NETWORKING [DSA]
+M: Andrew Lunn <andrew@lunn.ch>
+M: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+M: Florian Fainelli <f.fainelli@gmail.com>
+S: Maintained
+F: net/dsa/
+F: include/net/dsa.h
+F: drivers/net/dsa/
+
NETWORKING [GENERAL]
M: "David S. Miller" <davem@davemloft.net>
L: netdev@vger.kernel.org
@@ -8736,7 +8767,7 @@
F: include/linux/oprofile.h
ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
-M: Mark Fasheh <mfasheh@suse.com>
+M: Mark Fasheh <mfasheh@versity.com>
M: Joel Becker <jlbec@evilplan.org>
L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
W: http://ocfs2.wiki.kernel.org
@@ -8848,6 +8879,7 @@
F: Documentation/virtual/paravirt_ops.txt
F: arch/*/kernel/paravirt*
F: arch/*/include/asm/paravirt.h
+F: include/linux/hypervisor.h
PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES
M: Tim Waugh <tim@cyberelk.net>
@@ -11617,7 +11649,7 @@
THERMAL/CPU_COOLING
M: Amit Daniel Kachhap <amit.kachhap@gmail.com>
M: Viresh Kumar <viresh.kumar@linaro.org>
-M: Javi Merino <javi.merino@arm.com>
+M: Javi Merino <javi.merino@kernel.org>
L: linux-pm@vger.kernel.org
S: Supported
F: Documentation/thermal/cpu-cooling-api.txt
@@ -12388,7 +12420,6 @@
F: fs/hppfs/
USERSPACE I/O (UIO)
-M: "Hans J. Koch" <hjk@hansjkoch.de>
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
@@ -12570,7 +12601,7 @@
F: net/8021q/
VLYNQ BUS
-M: Florian Fainelli <florian@openwrt.org>
+M: Florian Fainelli <f.fainelli@gmail.com>
L: openwrt-devel@lists.openwrt.org (subscribers-only)
S: Maintained
F: drivers/vlynq/vlynq.c
diff --git a/Makefile b/Makefile
index 74e22c2..80b8671 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 4
PATCHLEVEL = 8
SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
NAME = Psychotic Stoned Sheep
# *DOCUMENTATION*
diff --git a/arch/Kconfig b/arch/Kconfig
index fd6e971..180ea33 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -696,4 +696,38 @@
config CPU_NO_EFFICIENT_FFS
def_bool n
+config HAVE_ARCH_VMAP_STACK
+ def_bool n
+ help
+ An arch should select this symbol if it can support kernel stacks
+ in vmalloc space. This means:
+
+ - vmalloc space must be large enough to hold many kernel stacks.
+ This may rule out many 32-bit architectures.
+
+ - Stacks in vmalloc space need to work reliably. For example, if
+ vmap page tables are created on demand, either this mechanism
+ needs to work while the stack points to a virtual address with
+ unpopulated page tables or arch code (switch_to() and switch_mm(),
+ most likely) needs to ensure that the stack's page table entries
+ are populated before running on a possibly unpopulated stack.
+
+ - If the stack overflows into a guard page, something reasonable
+ should happen. The definition of "reasonable" is flexible, but
+ instantly rebooting without logging anything would be unfriendly.
+
+config VMAP_STACK
+ default y
+ bool "Use a virtually-mapped stack"
+ depends on HAVE_ARCH_VMAP_STACK && !KASAN
+ ---help---
+ Enable this if you want the use virtually-mapped kernel stacks
+ with guard pages. This causes kernel stack overflows to be
+ caught immediately rather than causing difficult-to-diagnose
+ corruption.
+
+ This is presently incompatible with KASAN because KASAN expects
+ the stack to map directly to the KASAN shadow map using a formula
+ that is incorrect if the stack is in vmalloc space.
+
source "kernel/gcov/Kconfig"
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 0d3e59f..ecd1237 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -13,7 +13,7 @@
select CLKSRC_OF
select CLONE_BACKWARDS
select COMMON_CLK
- select GENERIC_ATOMIC64
+ select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
select GENERIC_CLOCKEVENTS
select GENERIC_FIND_FIRST_BIT
# for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP
@@ -353,8 +353,8 @@
config NODES_SHIFT
int "Maximum NUMA Nodes (as a power of 2)"
- default "1" if !DISCONTIGMEM
- default "2" if DISCONTIGMEM
+ default "0" if !DISCONTIGMEM
+ default "1" if DISCONTIGMEM
depends on NEED_MULTIPLE_NODES
---help---
Accessing memory beyond 1GB (with or w/o PAE) requires 2 memory
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 601ed17..aa82d13 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -47,7 +47,6 @@
upto_gcc44 := $(call cc-ifversion, -le, 0404, y)
atleast_gcc44 := $(call cc-ifversion, -ge, 0404, y)
-atleast_gcc48 := $(call cc-ifversion, -ge, 0408, y)
cflags-$(atleast_gcc44) += -fsection-anchors
@@ -66,10 +65,8 @@
endif
-# By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok
-ifeq ($(atleast_gcc48),y)
-cflags-$(CONFIG_ARC_DW2_UNWIND) += -gdwarf-2
-endif
+cfi := $(call as-instr,.cfi_startproc\n.cfi_endproc,-DARC_DW2_UNWIND_AS_CFI)
+cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables $(cfi)
ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
# Generic build system uses -O2, we want -O3
diff --git a/arch/arc/boot/dts/abilis_tb100_dvk.dts b/arch/arc/boot/dts/abilis_tb100_dvk.dts
index 3dd6ed9..3acf04d 100644
--- a/arch/arc/boot/dts/abilis_tb100_dvk.dts
+++ b/arch/arc/boot/dts/abilis_tb100_dvk.dts
@@ -24,6 +24,7 @@
/include/ "abilis_tb100.dtsi"
/ {
+ model = "abilis,tb100";
chosen {
bootargs = "earlycon=uart8250,mmio32,0xff100000,9600n8 console=ttyS0,9600n8";
};
diff --git a/arch/arc/boot/dts/abilis_tb101_dvk.dts b/arch/arc/boot/dts/abilis_tb101_dvk.dts
index 1cf51c2..37d88c5 100644
--- a/arch/arc/boot/dts/abilis_tb101_dvk.dts
+++ b/arch/arc/boot/dts/abilis_tb101_dvk.dts
@@ -24,6 +24,7 @@
/include/ "abilis_tb101.dtsi"
/ {
+ model = "abilis,tb101";
chosen {
bootargs = "earlycon=uart8250,mmio32,0xff100000,9600n8 console=ttyS0,9600n8";
};
diff --git a/arch/arc/boot/dts/axs101.dts b/arch/arc/boot/dts/axs101.dts
index 3f9b058..d9b9b9d 100644
--- a/arch/arc/boot/dts/axs101.dts
+++ b/arch/arc/boot/dts/axs101.dts
@@ -13,6 +13,7 @@
/include/ "axs10x_mb.dtsi"
/ {
+ model = "snps,axs101";
compatible = "snps,axs101", "snps,arc-sdp";
chosen {
diff --git a/arch/arc/boot/dts/axs103.dts b/arch/arc/boot/dts/axs103.dts
index e6d0e31..ec7fb27 100644
--- a/arch/arc/boot/dts/axs103.dts
+++ b/arch/arc/boot/dts/axs103.dts
@@ -16,6 +16,7 @@
/include/ "axs10x_mb.dtsi"
/ {
+ model = "snps,axs103";
compatible = "snps,axs103", "snps,arc-sdp";
chosen {
diff --git a/arch/arc/boot/dts/axs103_idu.dts b/arch/arc/boot/dts/axs103_idu.dts
index f999fef..070c297 100644
--- a/arch/arc/boot/dts/axs103_idu.dts
+++ b/arch/arc/boot/dts/axs103_idu.dts
@@ -16,6 +16,7 @@
/include/ "axs10x_mb.dtsi"
/ {
+ model = "snps,axs103-smp";
compatible = "snps,axs103", "snps,arc-sdp";
chosen {
diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts
index 6397051..ce0ccd20 100644
--- a/arch/arc/boot/dts/nsim_700.dts
+++ b/arch/arc/boot/dts/nsim_700.dts
@@ -10,6 +10,7 @@
/include/ "skeleton.dtsi"
/ {
+ model = "snps,nsim";
compatible = "snps,nsim";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts
index bf05fe5..3772c40 100644
--- a/arch/arc/boot/dts/nsim_hs.dts
+++ b/arch/arc/boot/dts/nsim_hs.dts
@@ -10,6 +10,7 @@
/include/ "skeleton_hs.dtsi"
/ {
+ model = "snps,nsim_hs";
compatible = "snps,nsim_hs";
#address-cells = <2>;
#size-cells = <2>;
diff --git a/arch/arc/boot/dts/nsim_hs_idu.dts b/arch/arc/boot/dts/nsim_hs_idu.dts
index 99eabe1..48434d7c 100644
--- a/arch/arc/boot/dts/nsim_hs_idu.dts
+++ b/arch/arc/boot/dts/nsim_hs_idu.dts
@@ -10,6 +10,7 @@
/include/ "skeleton_hs_idu.dtsi"
/ {
+ model = "snps,nsim_hs-smp";
compatible = "snps,nsim_hs";
interrupt-parent = <&core_intc>;
diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
index e659a34..bcf6031 100644
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -10,6 +10,7 @@
/include/ "skeleton.dtsi"
/ {
+ model = "snps,nsimosci";
compatible = "snps,nsimosci";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts
index 16ce5d6..14a727c 100644
--- a/arch/arc/boot/dts/nsimosci_hs.dts
+++ b/arch/arc/boot/dts/nsimosci_hs.dts
@@ -10,6 +10,7 @@
/include/ "skeleton_hs.dtsi"
/ {
+ model = "snps,nsimosci_hs";
compatible = "snps,nsimosci_hs";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts
index ce8dfbc..cbf65b6 100644
--- a/arch/arc/boot/dts/nsimosci_hs_idu.dts
+++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts
@@ -10,6 +10,7 @@
/include/ "skeleton_hs_idu.dtsi"
/ {
+ model = "snps,nsimosci_hs-smp";
compatible = "snps,nsimosci_hs";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arc/boot/dts/vdk_hs38.dts b/arch/arc/boot/dts/vdk_hs38.dts
index 5d803dd..3c51103 100644
--- a/arch/arc/boot/dts/vdk_hs38.dts
+++ b/arch/arc/boot/dts/vdk_hs38.dts
@@ -13,6 +13,7 @@
/include/ "vdk_axs10x_mb.dtsi"
/ {
+ model = "snps,vdk_archs";
compatible = "snps,axs103";
chosen {
diff --git a/arch/arc/boot/dts/vdk_hs38_smp.dts b/arch/arc/boot/dts/vdk_hs38_smp.dts
index 2ba60c39..6be6800 100644
--- a/arch/arc/boot/dts/vdk_hs38_smp.dts
+++ b/arch/arc/boot/dts/vdk_hs38_smp.dts
@@ -13,6 +13,7 @@
/include/ "vdk_axs10x_mb.dtsi"
/ {
+ model = "snps,vdk_archs-smp";
compatible = "snps,axs103";
chosen {
diff --git a/arch/arc/boot/dts/zebu_hs.dts b/arch/arc/boot/dts/zebu_hs.dts
new file mode 100644
index 0000000..1c1324e
--- /dev/null
+++ b/arch/arc/boot/dts/zebu_hs.dts
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2016-2014 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton_hs.dtsi"
+
+/ {
+ model = "snps,zebu_hs";
+ compatible = "snps,zebu_hs";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&core_intc>;
+
+ memory {
+ device_type = "memory";
+ reg = <0x80000000 0x20000000>; /* 512 */
+ };
+
+ chosen {
+ bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
+ };
+
+ aliases {
+ serial0 = &uart0;
+ };
+
+ fpga {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* child and parent address space 1:1 mapped */
+ ranges;
+
+ core_clk: core_clk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <50000000>;
+ };
+
+ core_intc: interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ uart0: serial@f0000000 {
+ compatible = "ns8250";
+ reg = <0xf0000000 0x2000>;
+ interrupts = <24>;
+ clock-frequency = <50000000>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ no-loopback-test = <1>;
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupts = <20>;
+ };
+ };
+};
diff --git a/arch/arc/boot/dts/zebu_hs_idu.dts b/arch/arc/boot/dts/zebu_hs_idu.dts
new file mode 100644
index 0000000..65204b4
--- /dev/null
+++ b/arch/arc/boot/dts/zebu_hs_idu.dts
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2016-2014 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton_hs_idu.dtsi"
+
+/ {
+ model = "snps,zebu_hs-smp";
+ compatible = "snps,zebu_hs";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&core_intc>;
+
+ memory {
+ device_type = "memory";
+ reg = <0x80000000 0x20000000>; /* 512 */
+ };
+
+ chosen {
+ bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug";
+ };
+
+ aliases {
+ serial0 = &uart0;
+ };
+
+ fpga {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* child and parent address space 1:1 mapped */
+ ranges;
+
+ core_clk: core_clk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <50000000>; /* 50 MHZ */
+ };
+
+ core_intc: interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+/* interrupts = <16 17 18 19 20 21 22 23 24 25>; */
+ };
+
+ idu_intc: idu-interrupt-controller {
+ compatible = "snps,archs-idu-intc";
+ interrupt-controller;
+ interrupt-parent = <&core_intc>;
+ /* <hwirq distribution>
+ distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3 */
+ #interrupt-cells = <2>;
+ interrupts = <24 25 26 27 28 29 30 31>;
+
+ };
+
+ uart0: serial@f0000000 {
+ /* compatible = "ns8250"; Doesn't use FIFOs */
+ compatible = "ns16550a";
+ reg = <0xf0000000 0x2000>;
+ interrupt-parent = <&idu_intc>;
+ /* interrupts = <0 1>; DEST=1*/
+ /* interrupts = <0 2>; DEST=2*/
+ interrupts = <0 0>; /* RR*/
+ clock-frequency = <50000000>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ no-loopback-test = <1>;
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupts = <20>;
+ };
+ };
+};
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index 6cdffea..0a0eaf0 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -18,6 +18,9 @@
# CONFIG_SLUB_DEBUG is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_ARC_PLAT_AXS10X=y
CONFIG_AXS101=y
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index 491b3b5..2233f57 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -18,6 +18,9 @@
# CONFIG_SLUB_DEBUG is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_ARC_PLAT_AXS10X=y
CONFIG_AXS103=y
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index b25ee73..1108747 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -18,6 +18,9 @@
# CONFIG_COMPAT_BRK is not set
CONFIG_SLAB=y
CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_ARC_PLAT_AXS10X=y
CONFIG_AXS103=y
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
index a99dc7a..65ab9fb 100644
--- a/arch/arc/configs/nsim_hs_defconfig
+++ b/arch/arc/configs/nsim_hs_defconfig
@@ -11,13 +11,16 @@
# CONFIG_UTS_NS is not set
# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
CONFIG_KALLSYMS_ALL=y
CONFIG_EMBEDDED=y
# CONFIG_SLUB_DEBUG is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_KPROBES=y
CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
index 59f221f..3b3990c 100644
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ b/arch/arc/configs/nsim_hs_smp_defconfig
@@ -16,6 +16,9 @@
# CONFIG_COMPAT_BRK is not set
CONFIG_KPROBES=y
CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
diff --git a/arch/arc/configs/zebu_hs_defconfig b/arch/arc/configs/zebu_hs_defconfig
new file mode 100644
index 0000000..9f6166b
--- /dev/null
+++ b/arch/arc/configs/zebu_hs_defconfig
@@ -0,0 +1,86 @@
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
+CONFIG_EXPERT=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="zebu_hs"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_DEBUG_MEMORY_INIT=y
+# CONFIG_DEBUG_PREEMPT is not set
diff --git a/arch/arc/configs/zebu_hs_smp_defconfig b/arch/arc/configs/zebu_hs_smp_defconfig
new file mode 100644
index 0000000..44e9693
--- /dev/null
+++ b/arch/arc/configs/zebu_hs_smp_defconfig
@@ -0,0 +1,89 @@
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_ARC_BUILTIN_DTB_NAME="zebu_hs_idu"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_LOCKUP_DETECTOR=y
+# CONFIG_DEBUG_PREEMPT is not set
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 7fbaea0..db25c65 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -95,7 +95,7 @@
/* Auxiliary registers */
#define AUX_IDENTITY 4
#define AUX_INTR_VEC_BASE 0x25
-#define AUX_NON_VOL 0x5e
+#define AUX_VOL 0x5e
/*
* Floating Pt Registers
@@ -240,14 +240,6 @@
#endif
};
-struct bcr_perip {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int start:8, pad2:8, sz:8, ver:8;
-#else
- unsigned int ver:8, sz:8, pad2:8, start:8;
-#endif
-};
-
struct bcr_iccm_arcompact {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int base:16, pad:5, sz:3, ver:8;
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 4e3c1b6..b65930a 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -20,6 +20,7 @@
#ifndef CONFIG_ARC_PLAT_EZNPS
#define atomic_read(v) READ_ONCE((v)->counter)
+#define ATOMIC_INIT(i) { (i) }
#ifdef CONFIG_ARC_HAS_LLSC
@@ -284,6 +285,7 @@
ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
#define atomic_sub(i, v) atomic_add(-(i), (v))
#define atomic_sub_return(i, v) atomic_add_return(-(i), (v))
+#define atomic_fetch_sub(i, v) atomic_fetch_add(-(i), (v))
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op, asm_op) \
@@ -292,6 +294,7 @@
ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
#define atomic_andnot(mask, v) atomic_and(~(mask), (v))
+#define atomic_fetch_andnot(mask, v) atomic_fetch_and(~(mask), (v))
ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
@@ -343,10 +346,266 @@
#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
-#define ATOMIC_INIT(i) { (i) }
+
+#ifdef CONFIG_GENERIC_ATOMIC64
#include <asm-generic/atomic64.h>
-#endif
+#else /* Kconfig ensures this is only enabled with needed h/w assist */
+
+/*
+ * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
+ * - The address HAS to be 64-bit aligned
+ * - There are 2 semantics involved here:
+ * = exclusive implies no interim update between load/store to same addr
+ * = both words are observed/updated together: this is guaranteed even
+ * for regular 64-bit load (LDD) / store (STD). Thus atomic64_set()
+ * is NOT required to use LLOCKD+SCONDD, STD suffices
+ */
+
+typedef struct {
+ aligned_u64 counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(a) { (a) }
+
+static inline long long atomic64_read(const atomic64_t *v)
+{
+ unsigned long long val;
+
+ __asm__ __volatile__(
+ " ldd %0, [%1] \n"
+ : "=r"(val)
+ : "r"(&v->counter));
+
+ return val;
+}
+
+static inline void atomic64_set(atomic64_t *v, long long a)
+{
+ /*
+ * This could have been a simple assignment in "C" but would need
+ * explicit volatile. Otherwise gcc optimizers could elide the store
+ * which borked atomic64 self-test
+ * In the inline asm version, memory clobber needed for exact same
+ * reason, to tell gcc about the store.
+ *
+ * This however is not needed for sibling atomic64_add() etc since both
+ * load/store are explicitly done in inline asm. As long as API is used
+ * for each access, gcc has no way to optimize away any load/store
+ */
+ __asm__ __volatile__(
+ " std %0, [%1] \n"
+ :
+ : "r"(a), "r"(&v->counter)
+ : "memory");
+}
+
+#define ATOMIC64_OP(op, op1, op2) \
+static inline void atomic64_##op(long long a, atomic64_t *v) \
+{ \
+ unsigned long long val; \
+ \
+ __asm__ __volatile__( \
+ "1: \n" \
+ " llockd %0, [%1] \n" \
+ " " #op1 " %L0, %L0, %L2 \n" \
+ " " #op2 " %H0, %H0, %H2 \n" \
+ " scondd %0, [%1] \n" \
+ " bnz 1b \n" \
+ : "=&r"(val) \
+ : "r"(&v->counter), "ir"(a) \
+ : "cc"); \
+} \
+
+#define ATOMIC64_OP_RETURN(op, op1, op2) \
+static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \
+{ \
+ unsigned long long val; \
+ \
+ smp_mb(); \
+ \
+ __asm__ __volatile__( \
+ "1: \n" \
+ " llockd %0, [%1] \n" \
+ " " #op1 " %L0, %L0, %L2 \n" \
+ " " #op2 " %H0, %H0, %H2 \n" \
+ " scondd %0, [%1] \n" \
+ " bnz 1b \n" \
+ : [val] "=&r"(val) \
+ : "r"(&v->counter), "ir"(a) \
+ : "cc"); /* memory clobber comes from smp_mb() */ \
+ \
+ smp_mb(); \
+ \
+ return val; \
+}
+
+#define ATOMIC64_FETCH_OP(op, op1, op2) \
+static inline long long atomic64_fetch_##op(long long a, atomic64_t *v) \
+{ \
+ unsigned long long val, orig; \
+ \
+ smp_mb(); \
+ \
+ __asm__ __volatile__( \
+ "1: \n" \
+ " llockd %0, [%2] \n" \
+ " " #op1 " %L1, %L0, %L3 \n" \
+ " " #op2 " %H1, %H0, %H3 \n" \
+ " scondd %1, [%2] \n" \
+ " bnz 1b \n" \
+ : "=&r"(orig), "=&r"(val) \
+ : "r"(&v->counter), "ir"(a) \
+ : "cc"); /* memory clobber comes from smp_mb() */ \
+ \
+ smp_mb(); \
+ \
+ return orig; \
+}
+
+#define ATOMIC64_OPS(op, op1, op2) \
+ ATOMIC64_OP(op, op1, op2) \
+ ATOMIC64_OP_RETURN(op, op1, op2) \
+ ATOMIC64_FETCH_OP(op, op1, op2)
+
+#define atomic64_andnot atomic64_andnot
+
+ATOMIC64_OPS(add, add.f, adc)
+ATOMIC64_OPS(sub, sub.f, sbc)
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or, or, or)
+ATOMIC64_OPS(xor, xor, xor)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+static inline long long
+atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new)
+{
+ long long prev;
+
+ smp_mb();
+
+ __asm__ __volatile__(
+ "1: llockd %0, [%1] \n"
+ " brne %L0, %L2, 2f \n"
+ " brne %H0, %H2, 2f \n"
+ " scondd %3, [%1] \n"
+ " bnz 1b \n"
+ "2: \n"
+ : "=&r"(prev)
+ : "r"(ptr), "ir"(expected), "r"(new)
+ : "cc"); /* memory clobber comes from smp_mb() */
+
+ smp_mb();
+
+ return prev;
+}
+
+static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
+{
+ long long prev;
+
+ smp_mb();
+
+ __asm__ __volatile__(
+ "1: llockd %0, [%1] \n"
+ " scondd %2, [%1] \n"
+ " bnz 1b \n"
+ "2: \n"
+ : "=&r"(prev)
+ : "r"(ptr), "r"(new)
+ : "cc"); /* memory clobber comes from smp_mb() */
+
+ smp_mb();
+
+ return prev;
+}
+
+/**
+ * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic64_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+
+static inline long long atomic64_dec_if_positive(atomic64_t *v)
+{
+ long long val;
+
+ smp_mb();
+
+ __asm__ __volatile__(
+ "1: llockd %0, [%1] \n"
+ " sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n"
+ " sub.c %H0, %H0, 1 # if C set, w1 - 1\n"
+ " brlt %H0, 0, 2f \n"
+ " scondd %0, [%1] \n"
+ " bnz 1b \n"
+ "2: \n"
+ : "=&r"(val)
+ : "r"(&v->counter)
+ : "cc"); /* memory clobber comes from smp_mb() */
+
+ smp_mb();
+
+ return val;
+}
+
+/**
+ * atomic64_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * if (v != u) { v += a; ret = 1} else {ret = 0}
+ * Returns 1 iff @v was not @u (i.e. if add actually happened)
+ */
+static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+ long long val;
+ int op_done;
+
+ smp_mb();
+
+ __asm__ __volatile__(
+ "1: llockd %0, [%2] \n"
+ " mov %1, 1 \n"
+ " brne %L0, %L4, 2f # continue to add since v != u \n"
+ " breq.d %H0, %H4, 3f # return since v == u \n"
+ " mov %1, 0 \n"
+ "2: \n"
+ " add.f %L0, %L0, %L3 \n"
+ " adc %H0, %H0, %H3 \n"
+ " scondd %0, [%2] \n"
+ " bnz 1b \n"
+ "3: \n"
+ : "=&r"(val), "=&r" (op_done)
+ : "r"(&v->counter), "r"(a), "r"(u)
+ : "cc"); /* memory clobber comes from smp_mb() */
+
+ smp_mb();
+
+ return op_done;
+}
+
+#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
+#define atomic64_inc(v) atomic64_add(1LL, (v))
+#define atomic64_inc_return(v) atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
+#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
+#define atomic64_dec(v) atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
+#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
+
+#endif /* !CONFIG_GENERIC_ATOMIC64 */
+
+#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 23706c6..fb781e3 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -54,7 +54,7 @@
extern void read_decode_cache_bcr(void);
extern int ioc_exists;
-extern unsigned long perip_base;
+extern unsigned long perip_base, perip_end;
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arc/include/asm/dwarf.h b/arch/arc/include/asm/dwarf.h
new file mode 100644
index 0000000..bb7bdbc
--- /dev/null
+++ b/arch/arc/include/asm/dwarf.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2016-17 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_ARC_DWARF_H
+#define _ASM_ARC_DWARF_H
+
+#ifdef __ASSEMBLY__
+
+#ifdef ARC_DW2_UNWIND_AS_CFI
+
+#define CFI_STARTPROC .cfi_startproc
+#define CFI_ENDPROC .cfi_endproc
+#define CFI_DEF_CFA .cfi_def_cfa
+#define CFI_REGISTER .cfi_register
+#define CFI_REL_OFFSET .cfi_rel_offset
+#define CFI_UNDEFINED .cfi_undefined
+
+#else
+
+#define CFI_IGNORE #
+
+#define CFI_STARTPROC CFI_IGNORE
+#define CFI_ENDPROC CFI_IGNORE
+#define CFI_DEF_CFA CFI_IGNORE
+#define CFI_REGISTER CFI_IGNORE
+#define CFI_REL_OFFSET CFI_IGNORE
+#define CFI_UNDEFINED CFI_IGNORE
+
+#endif /* !ARC_DW2_UNWIND_AS_CFI */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_ARC_DWARF_H */
diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h
index 51a99e2..7096f97 100644
--- a/arch/arc/include/asm/elf.h
+++ b/arch/arc/include/asm/elf.h
@@ -23,8 +23,7 @@
/* ARC Relocations (kernel Modules only) */
#define R_ARC_32 0x4
#define R_ARC_32_ME 0x1B
-#define R_ARC_S25H_PCREL 0x10
-#define R_ARC_S25W_PCREL 0x11
+#define R_ARC_32_PCREL 0x31
/*to set parameters in the core dumps */
#define ELF_ARCH EM_ARCOMPACT
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
index d1ec7f6..e880dfa 100644
--- a/arch/arc/include/asm/irqflags-arcv2.h
+++ b/arch/arc/include/asm/irqflags-arcv2.h
@@ -112,7 +112,7 @@
*/
temp = (1 << 5) |
((!!(temp & STATUS_IE_MASK)) << CLRI_STATUS_IE_BIT) |
- (temp & CLRI_STATUS_E_MASK);
+ ((temp >> 1) & CLRI_STATUS_E_MASK);
return temp;
}
diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h
index 5faad17..b29f1a9 100644
--- a/arch/arc/include/asm/linkage.h
+++ b/arch/arc/include/asm/linkage.h
@@ -9,6 +9,8 @@
#ifndef __ASM_LINKAGE_H
#define __ASM_LINKAGE_H
+#include <asm/dwarf.h>
+
#ifdef __ASSEMBLY__
#define ASM_NL ` /* use '`' to mark new line in macro */
@@ -32,6 +34,16 @@
#endif
.endm
+#define ENTRY_CFI(name) \
+ .globl name ASM_NL \
+ ALIGN ASM_NL \
+ name: ASM_NL \
+ CFI_STARTPROC ASM_NL
+
+#define END_CFI(name) \
+ CFI_ENDPROC ASM_NL \
+ .size name, .-name
+
#else /* !__ASSEMBLY__ */
#ifdef CONFIG_ARC_HAS_ICCM
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h
index 5f07176..9185541 100644
--- a/arch/arc/include/asm/perf_event.h
+++ b/arch/arc/include/asm/perf_event.h
@@ -118,6 +118,9 @@
[PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */
[PERF_COUNT_ARC_EDTLB] = "edtlb", /* D-TLB Miss */
[PERF_COUNT_ARC_EITLB] = "eitlb", /* I-TLB Miss */
+
+ [PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc", /* Instr: mem read cached */
+ [PERF_COUNT_HW_CACHE_MISSES] = "dclm", /* D-cache Load Miss */
};
#define C(_x) PERF_COUNT_HW_CACHE_##_x
diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S
index e6890b1..7c1f365 100644
--- a/arch/arc/kernel/ctx_sw_asm.S
+++ b/arch/arc/kernel/ctx_sw_asm.S
@@ -23,6 +23,7 @@
.global __switch_to
.type __switch_to, @function
__switch_to:
+ CFI_STARTPROC
/* Save regs on kernel mode stack of task */
st.a blink, [sp, -4]
@@ -59,4 +60,4 @@
ld.ab blink, [sp, 4]
j [blink]
-END(__switch_to)
+END_CFI(__switch_to)
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 2efb0625..1eea99b 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -35,7 +35,7 @@
btst r10, TIF_SYSCALL_TRACE
bnz tracesys_exit
- b ret_from_system_call
+ b .Lret_from_system_call
END(sys_clone_wrapper)
ENTRY(ret_from_fork)
@@ -61,18 +61,6 @@
b ret_from_exception
END(ret_from_fork)
-#ifdef CONFIG_ARC_DW2_UNWIND
-; Workaround for bug 94179 (STAR ):
-; Despite -fasynchronous-unwind-tables, linker is not making dwarf2 unwinder
-; section (.debug_frame) as loadable. So we force it here.
-; This also fixes STAR 9000487933 where the prev-workaround (objcopy --setflag)
-; would not work after a clean build due to kernel build system dependencies.
-.section .debug_frame, "wa",@progbits
-
-; Reset to .text as this file is included in entry-<isa>.S
-.section .text, "ax",@progbits
-#endif
-
;################### Non TLB Exception Handling #############################
; ---------------------------------------------
@@ -260,20 +248,18 @@
; syscall num shd not exceed the total system calls avail
cmp r8, NR_syscalls
mov.hi r0, -ENOSYS
- bhi ret_from_system_call
+ bhi .Lret_from_system_call
; Offset into the syscall_table and call handler
ld.as r9,[sys_call_table, r8]
jl [r9] ; Entry into Sys Call Handler
- ; fall through to ret_from_system_call
-END(EV_Trap)
-
-ENTRY(ret_from_system_call)
+.Lret_from_system_call:
st r0, [sp, PT_r0] ; sys call return value in pt_regs
- ; fall through yet again to ret_from_exception
+ ; fall through to ret_from_exception
+END(EV_Trap)
;############# Return from Intr/Excp/Trap (Linux Specifics) ##############
;
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index 6c24faf..62b59409 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -74,7 +74,7 @@
tmp = read_aux_reg(0xa);
tmp |= STATUS_AD_MASK | (irq_prio << 1);
tmp &= ~STATUS_IE_MASK;
- asm volatile("flag %0 \n"::"r"(tmp));
+ asm volatile("kflag %0 \n"::"r"(tmp));
}
static void arcv2_irq_mask(struct irq_data *data)
diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c
index 376e046..9a28497 100644
--- a/arch/arc/kernel/module.c
+++ b/arch/arc/kernel/module.c
@@ -22,13 +22,9 @@
*(addr + 1) = (value & 0xffff);
}
-/* ARC specific section quirks - before relocation loop in generic loader
- *
- * For dwarf unwinding out of modules, this needs to
- * 1. Ensure the .debug_frame is allocatable (ARC Linker bug: despite
- * -fasynchronous-unwind-tables it doesn't).
- * 2. Since we are iterating thru sec hdr tbl anyways, make a note of
- * the exact section index, for later use.
+/*
+ * This gets called before relocation loop in generic loader
+ * Make a note of the section index of unwinding section
*/
int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
char *secstr, struct module *mod)
@@ -40,8 +36,7 @@
mod->arch.unw_info = NULL;
for (i = 1; i < hdr->e_shnum; i++) {
- if (strcmp(secstr+sechdrs[i].sh_name, ".debug_frame") == 0) {
- sechdrs[i].sh_flags |= SHF_ALLOC;
+ if (strcmp(secstr+sechdrs[i].sh_name, ".eh_frame") == 0) {
mod->arch.unw_sec_idx = i;
break;
}
@@ -106,10 +101,12 @@
*/
relo_type = ELF32_R_TYPE(rel_entry[i].r_info);
- if (likely(R_ARC_32_ME == relo_type))
+ if (likely(R_ARC_32_ME == relo_type)) /* ME ( S + A ) */
arc_write_me((unsigned short *)location, relocation);
- else if (R_ARC_32 == relo_type)
+ else if (R_ARC_32 == relo_type) /* ( S + A ) */
*((Elf32_Addr *) location) = relocation;
+ else if (R_ARC_32_PCREL == relo_type) /* ( S + A ) - PDATA ) */
+ *((Elf32_Addr *) location) = relocation - location;
else
goto relo_err;
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 08f03d9..2ce24e7 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -179,8 +179,8 @@
if (arc_pmu->ev_hw_idx[event->attr.config] < 0)
return -ENOENT;
hwc->config |= arc_pmu->ev_hw_idx[event->attr.config];
- pr_debug("init event %d with h/w %d \'%s\'\n",
- (int) event->attr.config, (int) hwc->config,
+ pr_debug("init event %d with h/w %08x \'%s\'\n",
+ (int)event->attr.config, (int)hwc->config,
arc_pmu_ev_hw_map[event->attr.config]);
return 0;
@@ -189,6 +189,8 @@
if (ret < 0)
return ret;
hwc->config |= arc_pmu->ev_hw_idx[ret];
+ pr_debug("init cache event with h/w %08x \'%s\'\n",
+ (int)hwc->config, arc_pmu_ev_hw_map[ret]);
return 0;
default:
return -ENOENT;
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index f52a0d0d..3df7f9c 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -171,6 +171,7 @@
#else
{ {0x50, "ARC HS38 R2.0"}, 0x51},
{ {0x52, "ARC HS38 R2.1"}, 0x52},
+ { {0x53, "ARC HS38 R3.0"}, 0x53},
#endif
{ {0x00, NULL } }
};
@@ -272,8 +273,8 @@
FIX_PTR(cpu);
n += scnprintf(buf + n, len - n,
- "Vector Table\t: %#x\nUncached Base\t: %#lx\n",
- cpu->vec_base, perip_base);
+ "Vector Table\t: %#x\nPeripherals\t: %#lx:%#lx\n",
+ cpu->vec_base, perip_base, perip_end);
if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 0587bf1..61fd1ce 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -111,6 +111,8 @@
#define DW_EH_PE_indirect 0x80
#define DW_EH_PE_omit 0xff
+#define CIE_ID 0
+
typedef unsigned long uleb128_t;
typedef signed long sleb128_t;
@@ -232,6 +234,7 @@
static const u32 bad_cie, not_fde;
static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
+static const u32 *__cie_for_fde(const u32 *fde);
static signed fde_pointer_type(const u32 *cie);
struct eh_frame_hdr_table_entry {
@@ -338,10 +341,9 @@
for (fde = table->address, tableSize = table->size, n = 0;
tableSize;
tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
- /* const u32 *cie = fde + 1 - fde[1] / sizeof(*fde); */
- const u32 *cie = (const u32 *)(fde[1]);
+ const u32 *cie = __cie_for_fde(fde);
- if (fde[1] == 0xffffffff)
+ if (fde[1] == CIE_ID)
continue; /* this is a CIE */
ptr = (const u8 *)(fde + 2);
header->table[n].start = read_pointer(&ptr,
@@ -504,6 +506,15 @@
return value;
}
+static const u32 *__cie_for_fde(const u32 *fde)
+{
+ const u32 *cie;
+
+ cie = fde + 1 - fde[1] / sizeof(*fde);
+
+ return cie;
+}
+
static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
{
const u32 *cie;
@@ -511,19 +522,18 @@
if (!*fde || (*fde & (sizeof(*fde) - 1)))
return &bad_cie;
- if (fde[1] == 0xffffffff)
+ if (fde[1] == CIE_ID)
return ¬_fde; /* this is a CIE */
if ((fde[1] & (sizeof(*fde) - 1)))
/* || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address) */
return NULL; /* this is not a valid FDE */
- /* cie = fde + 1 - fde[1] / sizeof(*fde); */
- cie = (u32 *) fde[1];
+ cie = __cie_for_fde(fde);
if (*cie <= sizeof(*cie) + 4 || *cie >= fde[1] - sizeof(*fde)
|| (*cie & (sizeof(*cie) - 1))
- || (cie[1] != 0xffffffff))
+ || (cie[1] != CIE_ID))
return NULL; /* this is not a (valid) CIE */
return cie;
}
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
index 894e696..3661107 100644
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -82,14 +82,6 @@
PERCPU_SECTION(L1_CACHE_BYTES)
- /*
- * .exit.text is discard at runtime, not link time, to deal with
- * references from .debug_frame
- * It will be init freed, being inside [__init_start : __init_end]
- */
- .exit.text : { EXIT_TEXT }
- .exit.data : { EXIT_DATA }
-
. = ALIGN(PAGE_SIZE);
__init_end = .;
@@ -120,18 +112,13 @@
#ifdef CONFIG_ARC_DW2_UNWIND
. = ALIGN(PAGE_SIZE);
- .debug_frame : {
+ .eh_frame : {
__start_unwind = .;
- *(.debug_frame)
+ *(.eh_frame)
__end_unwind = .;
}
- /*
- * gcc 4.8 generates this for -fasynchonous-unwind-tables,
- * while we still use the .debug_frame based unwinder
- */
- /DISCARD/ : { *(.eh_frame) }
#else
- /DISCARD/ : { *(.debug_frame) }
+ /DISCARD/ : { *(.eh_frame) }
#endif
NOTES
@@ -148,7 +135,7 @@
}
#ifndef CONFIG_DEBUG_INFO
- /* open-coded because we need .debug_frame seperately for unwinding */
+ /DISCARD/ : { *(.debug_frame) }
/DISCARD/ : { *(.debug_aranges) }
/DISCARD/ : { *(.debug_pubnames) }
/DISCARD/ : { *(.debug_info) }
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
index a4015e7..21a1030 100644
--- a/arch/arc/lib/memcmp.S
+++ b/arch/arc/lib/memcmp.S
@@ -16,7 +16,7 @@
#define SHIFT r2
#endif
-ENTRY(memcmp)
+ENTRY_CFI(memcmp)
or r12,r0,r1
asl_s r12,r12,30
sub r3,r2,1
@@ -149,4 +149,4 @@
.Lnil:
j_s.d [blink]
mov r0,0
-END(memcmp)
+END_CFI(memcmp)
diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S
index 3222573..ba0becc 100644
--- a/arch/arc/lib/memcpy-700.S
+++ b/arch/arc/lib/memcpy-700.S
@@ -8,7 +8,7 @@
#include <linux/linkage.h>
-ENTRY(memcpy)
+ENTRY_CFI(memcpy)
or r3,r0,r1
asl_s r3,r3,30
mov_s r5,r0
@@ -63,4 +63,4 @@
.Lendbloop:
j_s.d [blink]
stb r12,[r5,0]
-END(memcpy)
+END_CFI(memcpy)
diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S
index f96c75e..d61044d 100644
--- a/arch/arc/lib/memcpy-archs.S
+++ b/arch/arc/lib/memcpy-archs.S
@@ -40,7 +40,7 @@
# define ZOLAND 0xF
#endif
-ENTRY(memcpy)
+ENTRY_CFI(memcpy)
prefetch [r1] ; Prefetch the read location
prefetchw [r0] ; Prefetch the write location
mov.f 0, r2
@@ -233,4 +233,4 @@
.Lcopybytewise_3:
j [blink]
-END(memcpy)
+END_CFI(memcpy)
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
index 365b183..62ad4bc 100644
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@@ -10,7 +10,7 @@
#undef PREALLOC_NOT_AVAIL
-ENTRY(memset)
+ENTRY_CFI(memset)
prefetchw [r0] ; Prefetch the write location
mov.f 0, r2
;;; if size is zero
@@ -112,11 +112,11 @@
j [blink]
-END(memset)
+END_CFI(memset)
-ENTRY(memzero)
+ENTRY_CFI(memzero)
; adjust bzero args to memset args
mov r2, r1
b.d memset ;tail call so need to tinker with blink
mov r1, 0
-END(memzero)
+END_CFI(memzero)
diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S
index d36bd43..cf736f9 100644
--- a/arch/arc/lib/memset.S
+++ b/arch/arc/lib/memset.S
@@ -10,7 +10,7 @@
#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */
-ENTRY(memset)
+ENTRY_CFI(memset)
mov_s r4,r0
or r12,r0,r2
bmsk.f r12,r12,1
@@ -46,14 +46,14 @@
stb.ab r1,[r4,1]
.Ltiny_end:
j_s [blink]
-END(memset)
+END_CFI(memset)
; memzero: @r0 = mem, @r1 = size_t
; memset: @r0 = mem, @r1 = char, @r2 = size_t
-ENTRY(memzero)
+ENTRY_CFI(memzero)
; adjust bzero args to memset args
mov r2, r1
mov r1, 0
b memset ;tail call so need to tinker with blink
-END(memzero)
+END_CFI(memzero)
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S
index b725d58..2d300da 100644
--- a/arch/arc/lib/strchr-700.S
+++ b/arch/arc/lib/strchr-700.S
@@ -13,7 +13,7 @@
#include <linux/linkage.h>
-ENTRY(strchr)
+ENTRY_CFI(strchr)
extb_s r1,r1
asl r5,r1,8
bmsk r2,r0,1
@@ -130,4 +130,4 @@
j_s.d [blink]
mov.mi r0,0
#endif /* ENDIAN */
-END(strchr)
+END_CFI(strchr)
diff --git a/arch/arc/lib/strcmp-archs.S b/arch/arc/lib/strcmp-archs.S
index 4f338ee..fae9e82 100644
--- a/arch/arc/lib/strcmp-archs.S
+++ b/arch/arc/lib/strcmp-archs.S
@@ -8,7 +8,7 @@
#include <linux/linkage.h>
-ENTRY(strcmp)
+ENTRY_CFI(strcmp)
or r2, r0, r1
bmsk_s r2, r2, 1
brne r2, 0, @.Lcharloop
@@ -75,4 +75,4 @@
.Lcmpend:
j_s.d [blink]
sub r0, r2, r3
-END(strcmp)
+END_CFI(strcmp)
diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S
index 3544600..fb20096 100644
--- a/arch/arc/lib/strcmp.S
+++ b/arch/arc/lib/strcmp.S
@@ -15,7 +15,7 @@
#include <linux/linkage.h>
-ENTRY(strcmp)
+ENTRY_CFI(strcmp)
or r2,r0,r1
bmsk_s r2,r2,1
brne r2,0,.Lcharloop
@@ -93,4 +93,4 @@
.Lcmpend:
j_s.d [blink]
sub r0,r2,r3
-END(strcmp)
+END_CFI(strcmp)
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S
index 8422f38..6a6c155 100644
--- a/arch/arc/lib/strcpy-700.S
+++ b/arch/arc/lib/strcpy-700.S
@@ -18,7 +18,7 @@
#include <linux/linkage.h>
-ENTRY(strcpy)
+ENTRY_CFI(strcpy)
or r2,r0,r1
bmsk_s r2,r2,1
brne.d r2,0,charloop
@@ -67,4 +67,4 @@
brne.d r3,0,charloop
stb.ab r3,[r10,1]
j [blink]
-END(strcpy)
+END_CFI(strcpy)
diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S
index 53cfd56..839b44b 100644
--- a/arch/arc/lib/strlen.S
+++ b/arch/arc/lib/strlen.S
@@ -8,7 +8,7 @@
#include <linux/linkage.h>
-ENTRY(strlen)
+ENTRY_CFI(strlen)
or r3,r0,7
ld r2,[r3,-7]
ld.a r6,[r3,-3]
@@ -80,4 +80,4 @@
.Learly_end:
b.d .Lend
sub_s.ne r1,r1,r1
-END(strlen)
+END_CFI(strlen)
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 0b10efe..97dddbe 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -25,6 +25,7 @@
int ioc_exists;
volatile int slc_enable = 1, ioc_enable = 1;
unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */
+unsigned long perip_end = 0xFFFFFFFF; /* legacy value */
void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr,
unsigned long sz, const int cacheop);
@@ -76,7 +77,6 @@
static void read_decode_cache_bcr_arcv2(int cpu)
{
struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
- struct bcr_generic uncached_space;
struct bcr_generic sbcr;
struct bcr_slc_cfg {
@@ -95,6 +95,15 @@
#endif
} cbcr;
+ struct bcr_volatile {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int start:4, limit:4, pad:22, order:1, disable:1;
+#else
+ unsigned int disable:1, order:1, pad:22, limit:4, start:4;
+#endif
+ } vol;
+
+
READ_BCR(ARC_REG_SLC_BCR, sbcr);
if (sbcr.ver) {
READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
@@ -107,10 +116,14 @@
if (cbcr.c && ioc_enable)
ioc_exists = 1;
- /* Legacy Data Uncached BCR is deprecated from v3 onwards */
- READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
- if (uncached_space.ver > 2)
- perip_base = read_aux_reg(AUX_NON_VOL) & 0xF0000000;
+ /* HS 2.0 didn't have AUX_VOL */
+ if (cpuinfo_arc700[cpu].core.family > 0x51) {
+ READ_BCR(AUX_VOL, vol);
+ perip_base = vol.start << 28;
+ /* HS 3.0 has limit and strict-ordering fields */
+ if (cpuinfo_arc700[cpu].core.family > 0x52)
+ perip_end = (vol.limit << 28) - 1;
+ }
}
void read_decode_cache_bcr(void)
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index f52b7db6..9881bd7 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -19,7 +19,7 @@
if (is_isa_arcompact()) {
if (paddr >= ARC_UNCACHED_ADDR_SPACE)
return true;
- } else if (paddr >= perip_base && paddr <= 0xFFFFFFFF) {
+ } else if (paddr >= perip_base && paddr <= perip_end) {
return true;
}
diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c
index e4fe514..aea8738 100644
--- a/arch/arc/plat-sim/platform.c
+++ b/arch/arc/plat-sim/platform.c
@@ -24,6 +24,7 @@
"snps,nsim_hs",
"snps,nsimosci",
"snps,nsimosci_hs",
+ "snps,zebu_hs",
NULL,
};
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a9c4e48..3cd9042 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1,6 +1,7 @@
config ARM
bool
default y
+ select ARCH_CLOCKSOURCE_DATA
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
@@ -878,6 +879,7 @@
select CLKSRC_STM32
select PINCTRL
select RESET_CONTROLLER
+ select STM32_EXTI
help
Support for STMicroelectronics STM32 processors.
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index af11c2f..fc6d541 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -779,7 +779,7 @@
orrne r0, r0, #1 @ MMU enabled
movne r1, #0xfffffffd @ domain 0 = client
bic r6, r6, #1 << 31 @ 32-bit translation system
- bic r6, r6, #3 << 0 @ use only ttbr0
+ bic r6, r6, #(7 << 0) | (1 << 4) @ use only ttbr0
mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer
mcrne p15, 0, r1, c3, c0, 0 @ load domain access control
mcrne p15, 0, r6, c2, c0, 2 @ load ttb control
diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi
index 35df462..1a189d4 100644
--- a/arch/arm/boot/dts/stm32f429.dtsi
+++ b/arch/arm/boot/dts/stm32f429.dtsi
@@ -176,6 +176,14 @@
reg = <0x40013800 0x400>;
};
+ exti: interrupt-controller@40013c00 {
+ compatible = "st,stm32-exti";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ reg = <0x40013C00 0x400>;
+ interrupts = <1>, <2>, <3>, <6>, <7>, <8>, <9>, <10>, <23>, <40>, <41>, <42>, <62>, <76>;
+ };
+
pin-controller {
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c
index 3f47f12..6053f64 100644
--- a/arch/arm/common/bL_switcher_dummy_if.c
+++ b/arch/arm/common/bL_switcher_dummy_if.c
@@ -56,16 +56,4 @@
"b.L_switcher",
&bL_switcher_fops
};
-
-static int __init bL_switcher_dummy_if_init(void)
-{
- return misc_register(&bL_switcher_device);
-}
-
-static void __exit bL_switcher_dummy_if_exit(void)
-{
- misc_deregister(&bL_switcher_device);
-}
-
-module_init(bL_switcher_dummy_if_init);
-module_exit(bL_switcher_dummy_if_exit);
+module_misc_device(bL_switcher_device);
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 01986de..36cc7cc 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -28,7 +28,7 @@
CONFIG_CPU_FREQ_GOV_POWERSAVE=m
CONFIG_CPU_FREQ_GOV_USERSPACE=m
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
CONFIG_CPUFREQ_DT=y
CONFIG_CPU_IDLE=y
CONFIG_ARM_EXYNOS_CPUIDLE=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index ea3566f..5845910 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -135,7 +135,7 @@
CONFIG_CPU_FREQ_GOV_POWERSAVE=m
CONFIG_CPU_FREQ_GOV_USERSPACE=m
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
CONFIG_QORIQ_CPUFREQ=y
CONFIG_CPU_IDLE=y
CONFIG_ARM_CPUIDLE=y
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index da3c042..aef022a 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -284,7 +284,7 @@
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
}
- if (nbytes) {
+ if (walk.nbytes % AES_BLOCK_SIZE) {
u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
u8 __aligned(8) tail[AES_BLOCK_SIZE];
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index e08d151..dfe4002 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -34,6 +34,7 @@
#define ICC_CTLR __ACCESS_CP15(c12, 0, c12, 4)
#define ICC_SRE __ACCESS_CP15(c12, 0, c12, 5)
#define ICC_IGRPEN1 __ACCESS_CP15(c12, 0, c12, 7)
+#define ICC_BPR1 __ACCESS_CP15(c12, 0, c12, 3)
#define ICC_HSRE __ACCESS_CP15(c12, 4, c9, 5)
@@ -157,6 +158,11 @@
isb();
}
+static inline void gic_write_bpr1(u32 val)
+{
+ asm volatile("mcr " __stringify(ICC_BPR1) : : "r" (val));
+}
+
/*
* Even in 32bit systems that use LPAE, there is no guarantee that the I/O
* interface provides true 64bit atomic accesses, so using strd/ldrd doesn't
diff --git a/arch/arm/include/asm/clocksource.h b/arch/arm/include/asm/clocksource.h
new file mode 100644
index 0000000..0b350a7
--- /dev/null
+++ b/arch/arm/include/asm/clocksource.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_CLOCKSOURCE_H
+#define _ASM_CLOCKSOURCE_H
+
+struct arch_clocksource_data {
+ bool vdso_direct; /* Usable for direct VDSO access? */
+};
+
+#endif
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index d009f79..bf02dbd 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -111,7 +111,7 @@
/* The ARM override for dma_max_pfn() */
static inline unsigned long dma_max_pfn(struct device *dev)
{
- return PHYS_PFN_OFFSET + dma_to_pfn(dev, *dev->dma_mask);
+ return dma_to_pfn(dev, *dev->dma_mask);
}
#define dma_max_pfn(dev) dma_max_pfn(dev)
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index 40ecd5f..f676feb 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -88,6 +88,8 @@
return;
for_each_child_of_node(cpus, cpu) {
+ const __be32 *cell;
+ int prop_bytes;
u32 hwid;
if (of_node_cmp(cpu->type, "cpu"))
@@ -99,7 +101,8 @@
* properties is considered invalid to build the
* cpu_logical_map.
*/
- if (of_property_read_u32(cpu, "reg", &hwid)) {
+ cell = of_get_property(cpu, "reg", &prop_bytes);
+ if (!cell || prop_bytes < sizeof(*cell)) {
pr_debug(" * %s missing reg property\n",
cpu->full_name);
of_node_put(cpu);
@@ -107,10 +110,15 @@
}
/*
- * 8 MSBs must be set to 0 in the DT since the reg property
+ * Bits n:24 must be set to 0 in the DT since the reg property
* defines the MPIDR[23:0].
*/
- if (hwid & ~MPIDR_HWID_BITMASK) {
+ do {
+ hwid = be32_to_cpu(*cell++);
+ prop_bytes -= sizeof(*cell);
+ } while (!hwid && prop_bytes > 0);
+
+ if (prop_bytes || (hwid & ~MPIDR_HWID_BITMASK)) {
of_node_put(cpu);
return;
}
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 709ee1d..3f17594 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -218,7 +218,7 @@
}
err = ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer);
+ frame_pointer, NULL);
if (err == -EBUSY) {
*parent = old;
return;
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 1506385..b942349 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -596,12 +596,6 @@
.attrs = armv7_pmuv1_event_attrs,
};
-static const struct attribute_group *armv7_pmuv1_attr_groups[] = {
- &armv7_pmuv1_events_attr_group,
- &armv7_pmu_format_attr_group,
- NULL,
-};
-
ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS);
ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS);
ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB);
@@ -653,12 +647,6 @@
.attrs = armv7_pmuv2_event_attrs,
};
-static const struct attribute_group *armv7_pmuv2_attr_groups[] = {
- &armv7_pmuv2_events_attr_group,
- &armv7_pmu_format_attr_group,
- NULL,
-};
-
/*
* Perf Events' indices
*/
@@ -1208,7 +1196,10 @@
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "armv7_cortex_a8";
cpu_pmu->map_event = armv7_a8_map_event;
- cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv1_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
return armv7_probe_num_events(cpu_pmu);
}
@@ -1217,7 +1208,10 @@
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "armv7_cortex_a9";
cpu_pmu->map_event = armv7_a9_map_event;
- cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv1_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
return armv7_probe_num_events(cpu_pmu);
}
@@ -1226,7 +1220,10 @@
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "armv7_cortex_a5";
cpu_pmu->map_event = armv7_a5_map_event;
- cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv1_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
return armv7_probe_num_events(cpu_pmu);
}
@@ -1236,7 +1233,10 @@
cpu_pmu->name = "armv7_cortex_a15";
cpu_pmu->map_event = armv7_a15_map_event;
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
- cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
return armv7_probe_num_events(cpu_pmu);
}
@@ -1246,7 +1246,10 @@
cpu_pmu->name = "armv7_cortex_a7";
cpu_pmu->map_event = armv7_a7_map_event;
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
- cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
return armv7_probe_num_events(cpu_pmu);
}
@@ -1256,7 +1259,10 @@
cpu_pmu->name = "armv7_cortex_a12";
cpu_pmu->map_event = armv7_a12_map_event;
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
- cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
return armv7_probe_num_events(cpu_pmu);
}
@@ -1264,7 +1270,10 @@
{
int ret = armv7_a12_pmu_init(cpu_pmu);
cpu_pmu->name = "armv7_cortex_a17";
- cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv7_pmuv2_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv7_pmu_format_attr_group;
return ret;
}
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index 994e971..a0affd1 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -270,7 +270,7 @@
if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
return false;
- if (strcmp(tk->tkr_mono.clock->name, "arch_sys_counter") != 0)
+ if (!tk->tkr_mono.clock->archdata.vdso_direct)
return false;
return true;
diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c
index 0c47543..369f95a 100644
--- a/arch/arm/mach-omap2/omap-wakeupgen.c
+++ b/arch/arm/mach-omap2/omap-wakeupgen.c
@@ -322,34 +322,25 @@
#endif
#ifdef CONFIG_HOTPLUG_CPU
-static int irq_cpu_hotplug_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int omap_wakeupgen_cpu_online(unsigned int cpu)
{
- unsigned int cpu = (unsigned int)hcpu;
-
- /*
- * Corresponding FROZEN transitions do not have to be handled,
- * they are handled by at a higher level
- * (drivers/cpuidle/coupled.c).
- */
- switch (action) {
- case CPU_ONLINE:
- wakeupgen_irqmask_all(cpu, 0);
- break;
- case CPU_DEAD:
- wakeupgen_irqmask_all(cpu, 1);
- break;
- }
- return NOTIFY_OK;
+ wakeupgen_irqmask_all(cpu, 0);
+ return 0;
}
-static struct notifier_block irq_hotplug_notifier = {
- .notifier_call = irq_cpu_hotplug_notify,
-};
+static int omap_wakeupgen_cpu_dead(unsigned int cpu)
+{
+ wakeupgen_irqmask_all(cpu, 1);
+ return 0;
+}
static void __init irq_hotplug_init(void)
{
- register_hotcpu_notifier(&irq_hotplug_notifier);
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "arm/omap-wake:online",
+ omap_wakeupgen_cpu_online, NULL);
+ cpuhp_setup_state_nocalls(CPUHP_ARM_OMAP_WAKE_DEAD,
+ "arm/omap-wake:dead", NULL,
+ omap_wakeupgen_cpu_dead);
}
#else
static void __init irq_hotplug_init(void)
diff --git a/arch/arm/mach-shmobile/platsmp-scu.c b/arch/arm/mach-shmobile/platsmp-scu.c
index 8d478f1..d1ecaf3 100644
--- a/arch/arm/mach-shmobile/platsmp-scu.c
+++ b/arch/arm/mach-shmobile/platsmp-scu.c
@@ -21,26 +21,14 @@
static phys_addr_t shmobile_scu_base_phys;
static void __iomem *shmobile_scu_base;
-static int shmobile_smp_scu_notifier_call(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int shmobile_scu_cpu_prepare(unsigned int cpu)
{
- unsigned int cpu = (long)hcpu;
-
- switch (action) {
- case CPU_UP_PREPARE:
- /* For this particular CPU register SCU SMP boot vector */
- shmobile_smp_hook(cpu, virt_to_phys(shmobile_boot_scu),
- shmobile_scu_base_phys);
- break;
- };
-
- return NOTIFY_OK;
+ /* For this particular CPU register SCU SMP boot vector */
+ shmobile_smp_hook(cpu, virt_to_phys(shmobile_boot_scu),
+ shmobile_scu_base_phys);
+ return 0;
}
-static struct notifier_block shmobile_smp_scu_notifier = {
- .notifier_call = shmobile_smp_scu_notifier_call,
-};
-
void __init shmobile_smp_scu_prepare_cpus(phys_addr_t scu_base_phys,
unsigned int max_cpus)
{
@@ -54,7 +42,9 @@
scu_power_mode(shmobile_scu_base, SCU_PM_NORMAL);
/* Use CPU notifier for reset vector control */
- register_cpu_notifier(&shmobile_smp_scu_notifier);
+ cpuhp_setup_state_nocalls(CPUHP_ARM_SHMOBILE_SCU_PREPARE,
+ "arm/shmobile-scu:prepare",
+ shmobile_scu_cpu_prepare, NULL);
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index bc3f00f..7d6bcf6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -4,6 +4,8 @@
select ACPI_GENERIC_GSI if ACPI
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
select ACPI_MCFG if ACPI
+ select ACPI_SPCR_TABLE if ACPI
+ select ARCH_CLOCKSOURCE_DATA
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
@@ -102,10 +104,8 @@
select NO_BOOTMEM
select OF
select OF_EARLY_FLATTREE
- select OF_NUMA if NUMA && OF
select OF_RESERVED_MEM
select PCI_ECAM if ACPI
- select PERF_USE_VMALLOC
select POWER_RESET
select POWER_SUPPLY
select SPARSE_IRQ
@@ -122,6 +122,9 @@
config MMU
def_bool y
+config DEBUG_RODATA
+ def_bool y
+
config ARM64_PAGE_SHIFT
int
default 16 if ARM64_64K_PAGES
@@ -415,18 +418,13 @@
config ARM64_ERRATUM_843419
bool "Cortex-A53: 843419: A load or store might access an incorrect address"
- depends on MODULES
default y
- select ARM64_MODULE_CMODEL_LARGE
+ select ARM64_MODULE_CMODEL_LARGE if MODULES
help
- This option builds kernel modules using the large memory model in
- order to avoid the use of the ADRP instruction, which can cause
- a subsequent memory access to use an incorrect address on Cortex-A53
- parts up to r0p4.
-
- Note that the kernel itself must be linked with a version of ld
- which fixes potentially affected ADRP instructions through the
- use of veneers.
+ This option links the kernel with '--fix-cortex-a53-843419' and
+ builds modules using the large memory model in order to avoid the use
+ of the ADRP instruction, which can cause a subsequent memory access
+ to use an incorrect address on Cortex-A53 parts up to r0p4.
If unsure, say Y.
@@ -582,7 +580,8 @@
# Common NUMA Features
config NUMA
bool "Numa Memory Allocation and Scheduler Support"
- depends on SMP
+ select ACPI_NUMA if ACPI
+ select OF_NUMA
help
Enable NUMA (Non Uniform Memory Access) support.
@@ -603,11 +602,18 @@
def_bool y
depends on NUMA
+config HAVE_SETUP_PER_CPU_AREA
+ def_bool y
+ depends on NUMA
+
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+ def_bool y
+ depends on NUMA
+
source kernel/Kconfig.preempt
source kernel/Kconfig.hz
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
- depends on !HIBERNATION
def_bool y
config ARCH_HAS_HOLES_MEMORYMODEL
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 0cc758c..b661fe7 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -49,16 +49,6 @@
If in doubt, say Y.
-config DEBUG_RODATA
- bool "Make kernel text and rodata read-only"
- default y
- help
- If this is set, kernel text and rodata will be made read-only. This
- is to help catch accidental or malicious attempts to change the
- kernel's executable code.
-
- If in doubt, say Y.
-
config DEBUG_ALIGN_RODATA
depends on DEBUG_RODATA
bool "Align linker sections up to SECTION_SIZE"
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index be5d824..a8c77c7 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -93,6 +93,7 @@
select ARMADA_CP110_SYSCON
select ARMADA_37XX_CLK
select MVEBU_ODMI
+ select MVEBU_PIC
help
This enables support for Marvell EBU familly, including:
- Armada 3700 SoC Family
@@ -159,7 +160,6 @@
select CLKSRC_MMIO
select CLKSRC_OF
select GENERIC_CLOCKEVENTS
- select HAVE_CLK
select PINCTRL
select RESET_CONTROLLER
help
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 5b54f8c..ab51aed 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -18,6 +18,14 @@
LDFLAGS_vmlinux += -pie -Bsymbolic
endif
+ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
+ ifeq ($(call ld-option, --fix-cortex-a53-843419),)
+$(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum)
+ else
+LDFLAGS_vmlinux += --fix-cortex-a53-843419
+ endif
+endif
+
KBUILD_DEFCONFIG := defconfig
# Check for binutils support for specific extensions
@@ -38,10 +46,12 @@
KBUILD_CPPFLAGS += -mbig-endian
AS += -EB
LD += -EB
+UTS_MACHINE := aarch64_be
else
KBUILD_CPPFLAGS += -mlittle-endian
AS += -EL
LD += -EL
+UTS_MACHINE := aarch64
endif
CHECKFLAGS += -D__aarch64__
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 5c88804..6b2aa0f 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -216,7 +216,7 @@
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
}
- if (nbytes) {
+ if (walk.nbytes % AES_BLOCK_SIZE) {
u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
u8 __aligned(8) tail[AES_BLOCK_SIZE];
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index f43d2c4..44e1d7f 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,4 +1,3 @@
-generic-y += bug.h
generic-y += bugs.h
generic-y += clkdev.h
generic-y += cputime.h
@@ -10,7 +9,6 @@
generic-y += early_ioremap.h
generic-y += emergency-restart.h
generic-y += errno.h
-generic-y += ftrace.h
generic-y += hw_irq.h
generic-y += ioctl.h
generic-y += ioctls.h
@@ -27,12 +25,10 @@
generic-y += msgbuf.h
generic-y += msi.h
generic-y += mutex.h
-generic-y += pci.h
generic-y += poll.h
generic-y += preempt.h
generic-y += resource.h
generic-y += rwsem.h
-generic-y += sections.h
generic-y += segment.h
generic-y += sembuf.h
generic-y += serial.h
@@ -45,7 +41,6 @@
generic-y += switch_to.h
generic-y += termbits.h
generic-y += termios.h
-generic-y += topology.h
generic-y += trace_clock.h
generic-y += types.h
generic-y += unaligned.h
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 5420cb0..e517088 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -12,7 +12,7 @@
#ifndef _ASM_ACPI_H
#define _ASM_ACPI_H
-#include <linux/mm.h>
+#include <linux/memblock.h>
#include <linux/psci.h>
#include <asm/cputype.h>
@@ -32,7 +32,11 @@
static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
acpi_size size)
{
- if (!page_is_ram(phys >> PAGE_SHIFT))
+ /*
+ * EFI's reserve_regions() call adds memory with the WB attribute
+ * to memblock via early_init_dt_add_memory_arch().
+ */
+ if (!memblock_is_memory(phys))
return ioremap(phys, size);
return ioremap_cache(phys, size);
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 8746ff6..55101bd 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -2,6 +2,7 @@
#define __ASM_ALTERNATIVE_H
#include <asm/cpufeature.h>
+#include <asm/insn.h>
#ifndef __ASSEMBLY__
@@ -90,24 +91,15 @@
.endm
/*
- * Begin an alternative code sequence.
+ * Alternative sequences
*
- * The code that follows this macro will be assembled and linked as
- * normal. There are no restrictions on this code.
- */
-.macro alternative_if_not cap
- .pushsection .altinstructions, "a"
- altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
- .popsection
-661:
-.endm
-
-/*
- * Provide the alternative code sequence.
+ * The code for the case where the capability is not present will be
+ * assembled and linked as normal. There are no restrictions on this
+ * code.
*
- * The code that follows this macro is assembled into a special
- * section to be used for dynamic patching. Code that follows this
- * macro must:
+ * The code for the case where the capability is present will be
+ * assembled into a special section to be used for dynamic patching.
+ * Code for that case must:
*
* 1. Be exactly the same length (in bytes) as the default code
* sequence.
@@ -116,8 +108,38 @@
* alternative sequence it is defined in (branches into an
* alternative sequence are not fixed up).
*/
+
+/*
+ * Begin an alternative code sequence.
+ */
+.macro alternative_if_not cap
+ .set .Lasm_alt_mode, 0
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
+ .popsection
+661:
+.endm
+
+.macro alternative_if cap
+ .set .Lasm_alt_mode, 1
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
+ .popsection
+ .pushsection .altinstr_replacement, "ax"
+ .align 2 /* So GAS knows label 661 is suitably aligned */
+661:
+.endm
+
+/*
+ * Provide the other half of the alternative code sequence.
+ */
.macro alternative_else
-662: .pushsection .altinstr_replacement, "ax"
+662:
+ .if .Lasm_alt_mode==0
+ .pushsection .altinstr_replacement, "ax"
+ .else
+ .popsection
+ .endif
663:
.endm
@@ -125,11 +147,25 @@
* Complete an alternative code sequence.
*/
.macro alternative_endif
-664: .popsection
+664:
+ .if .Lasm_alt_mode==0
+ .popsection
+ .endif
.org . - (664b-663b) + (662b-661b)
.org . - (662b-661b) + (664b-663b)
.endm
+/*
+ * Provides a trivial alternative or default sequence consisting solely
+ * of NOPs. The number of NOPs is chosen automatically to match the
+ * previous case.
+ */
+.macro alternative_else_nop_endif
+alternative_else
+ nops (662b-661b) / AARCH64_INSN_SIZE
+alternative_endif
+.endm
+
#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 8ec88e5..fc2a0cb 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -28,6 +28,7 @@
#define ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4)
#define ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5)
#define ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+#define ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3)
#define ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5)
@@ -165,6 +166,11 @@
isb();
}
+static inline void gic_write_bpr1(u32 val)
+{
+ asm volatile("msr_s " __stringify(ICC_BPR1_EL1) ", %0" : : "r" (val));
+}
+
#define gic_read_typer(c) readq_relaxed(c)
#define gic_write_irouter(v, c) writeq_relaxed(v, c)
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index fbe0ca3..eaa5bbe 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -20,13 +20,55 @@
#define __ASM_ARCH_TIMER_H
#include <asm/barrier.h>
+#include <asm/sysreg.h>
#include <linux/bug.h>
#include <linux/init.h>
+#include <linux/jump_label.h>
#include <linux/types.h>
#include <clocksource/arm_arch_timer.h>
+#if IS_ENABLED(CONFIG_FSL_ERRATUM_A008585)
+extern struct static_key_false arch_timer_read_ool_enabled;
+#define needs_fsl_a008585_workaround() \
+ static_branch_unlikely(&arch_timer_read_ool_enabled)
+#else
+#define needs_fsl_a008585_workaround() false
+#endif
+
+u32 __fsl_a008585_read_cntp_tval_el0(void);
+u32 __fsl_a008585_read_cntv_tval_el0(void);
+u64 __fsl_a008585_read_cntvct_el0(void);
+
+/*
+ * The number of retries is an arbitrary value well beyond the highest number
+ * of iterations the loop has been observed to take.
+ */
+#define __fsl_a008585_read_reg(reg) ({ \
+ u64 _old, _new; \
+ int _retries = 200; \
+ \
+ do { \
+ _old = read_sysreg(reg); \
+ _new = read_sysreg(reg); \
+ _retries--; \
+ } while (unlikely(_old != _new) && _retries); \
+ \
+ WARN_ON_ONCE(!_retries); \
+ _new; \
+})
+
+#define arch_timer_reg_read_stable(reg) \
+({ \
+ u64 _val; \
+ if (needs_fsl_a008585_workaround()) \
+ _val = __fsl_a008585_read_##reg(); \
+ else \
+ _val = read_sysreg(reg); \
+ _val; \
+})
+
/*
* These register accessors are marked inline so the compiler can
* nicely work out which register we want, and chuck away the rest of
@@ -38,19 +80,19 @@
if (access == ARCH_TIMER_PHYS_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
- asm volatile("msr cntp_ctl_el0, %0" : : "r" (val));
+ write_sysreg(val, cntp_ctl_el0);
break;
case ARCH_TIMER_REG_TVAL:
- asm volatile("msr cntp_tval_el0, %0" : : "r" (val));
+ write_sysreg(val, cntp_tval_el0);
break;
}
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
- asm volatile("msr cntv_ctl_el0, %0" : : "r" (val));
+ write_sysreg(val, cntv_ctl_el0);
break;
case ARCH_TIMER_REG_TVAL:
- asm volatile("msr cntv_tval_el0, %0" : : "r" (val));
+ write_sysreg(val, cntv_tval_el0);
break;
}
}
@@ -61,48 +103,38 @@
static __always_inline
u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
{
- u32 val;
-
if (access == ARCH_TIMER_PHYS_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
- asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val));
- break;
+ return read_sysreg(cntp_ctl_el0);
case ARCH_TIMER_REG_TVAL:
- asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
- break;
+ return arch_timer_reg_read_stable(cntp_tval_el0);
}
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
- asm volatile("mrs %0, cntv_ctl_el0" : "=r" (val));
- break;
+ return read_sysreg(cntv_ctl_el0);
case ARCH_TIMER_REG_TVAL:
- asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
- break;
+ return arch_timer_reg_read_stable(cntv_tval_el0);
}
}
- return val;
+ BUG();
}
static inline u32 arch_timer_get_cntfrq(void)
{
- u32 val;
- asm volatile("mrs %0, cntfrq_el0" : "=r" (val));
- return val;
+ return read_sysreg(cntfrq_el0);
}
static inline u32 arch_timer_get_cntkctl(void)
{
- u32 cntkctl;
- asm volatile("mrs %0, cntkctl_el1" : "=r" (cntkctl));
- return cntkctl;
+ return read_sysreg(cntkctl_el1);
}
static inline void arch_timer_set_cntkctl(u32 cntkctl)
{
- asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl));
+ write_sysreg(cntkctl, cntkctl_el1);
}
static inline u64 arch_counter_get_cntpct(void)
@@ -116,12 +148,8 @@
static inline u64 arch_counter_get_cntvct(void)
{
- u64 cval;
-
isb();
- asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
-
- return cval;
+ return arch_timer_reg_read_stable(cntvct_el0);
}
static inline int arch_timer_arch_init(void)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index d5025c6..28bfe61 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -87,6 +87,15 @@
.endm
/*
+ * NOP sequence
+ */
+ .macro nops, num
+ .rept \num
+ nop
+ .endr
+ .endm
+
+/*
* Emit an entry into the exception table
*/
.macro _asm_extable, from, to
@@ -216,11 +225,26 @@
.macro mmid, rd, rn
ldr \rd, [\rn, #MM_CONTEXT_ID]
.endm
+/*
+ * read_ctr - read CTR_EL0. If the system has mismatched
+ * cache line sizes, provide the system wide safe value
+ * from arm64_ftr_reg_ctrel0.sys_val
+ */
+ .macro read_ctr, reg
+alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE
+ mrs \reg, ctr_el0 // read CTR
+ nop
+alternative_else
+ ldr_l \reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL
+alternative_endif
+ .endm
+
/*
- * dcache_line_size - get the minimum D-cache line size from the CTR register.
+ * raw_dcache_line_size - get the minimum D-cache line size on this CPU
+ * from the CTR register.
*/
- .macro dcache_line_size, reg, tmp
+ .macro raw_dcache_line_size, reg, tmp
mrs \tmp, ctr_el0 // read CTR
ubfm \tmp, \tmp, #16, #19 // cache line size encoding
mov \reg, #4 // bytes per word
@@ -228,9 +252,20 @@
.endm
/*
- * icache_line_size - get the minimum I-cache line size from the CTR register.
+ * dcache_line_size - get the safe D-cache line size across all CPUs
*/
- .macro icache_line_size, reg, tmp
+ .macro dcache_line_size, reg, tmp
+ read_ctr \tmp
+ ubfm \tmp, \tmp, #16, #19 // cache line size encoding
+ mov \reg, #4 // bytes per word
+ lsl \reg, \reg, \tmp // actual cache line size
+ .endm
+
+/*
+ * raw_icache_line_size - get the minimum I-cache line size on this CPU
+ * from the CTR register.
+ */
+ .macro raw_icache_line_size, reg, tmp
mrs \tmp, ctr_el0 // read CTR
and \tmp, \tmp, #0xf // cache line size encoding
mov \reg, #4 // bytes per word
@@ -238,6 +273,16 @@
.endm
/*
+ * icache_line_size - get the safe I-cache line size across all CPUs
+ */
+ .macro icache_line_size, reg, tmp
+ read_ctr \tmp
+ and \tmp, \tmp, #0xf // cache line size encoding
+ mov \reg, #4 // bytes per word
+ lsl \reg, \reg, \tmp // actual cache line size
+ .endm
+
+/*
* tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
*/
.macro tcr_set_idmap_t0sz, valreg, tmpreg
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index b5890be..7457ce0 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -86,8 +86,8 @@
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
- " nop\n" \
- __LL_SC_ATOMIC(add_return##name), \
+ __LL_SC_ATOMIC(add_return##name) \
+ __nops(1), \
/* LSE atomics */ \
" ldadd" #mb " %w[i], w30, %[v]\n" \
" add %w[i], %w[i], w30") \
@@ -112,8 +112,8 @@
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
- " nop\n"
- __LL_SC_ATOMIC(and),
+ __LL_SC_ATOMIC(and)
+ __nops(1),
/* LSE atomics */
" mvn %w[i], %w[i]\n"
" stclr %w[i], %[v]")
@@ -130,8 +130,8 @@
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
- " nop\n" \
- __LL_SC_ATOMIC(fetch_and##name), \
+ __LL_SC_ATOMIC(fetch_and##name) \
+ __nops(1), \
/* LSE atomics */ \
" mvn %w[i], %w[i]\n" \
" ldclr" #mb " %w[i], %w[i], %[v]") \
@@ -156,8 +156,8 @@
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
- " nop\n"
- __LL_SC_ATOMIC(sub),
+ __LL_SC_ATOMIC(sub)
+ __nops(1),
/* LSE atomics */
" neg %w[i], %w[i]\n"
" stadd %w[i], %[v]")
@@ -174,9 +174,8 @@
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
- " nop\n" \
__LL_SC_ATOMIC(sub_return##name) \
- " nop", \
+ __nops(2), \
/* LSE atomics */ \
" neg %w[i], %w[i]\n" \
" ldadd" #mb " %w[i], w30, %[v]\n" \
@@ -203,8 +202,8 @@
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
- " nop\n" \
- __LL_SC_ATOMIC(fetch_sub##name), \
+ __LL_SC_ATOMIC(fetch_sub##name) \
+ __nops(1), \
/* LSE atomics */ \
" neg %w[i], %w[i]\n" \
" ldadd" #mb " %w[i], %w[i], %[v]") \
@@ -284,8 +283,8 @@
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
- " nop\n" \
- __LL_SC_ATOMIC64(add_return##name), \
+ __LL_SC_ATOMIC64(add_return##name) \
+ __nops(1), \
/* LSE atomics */ \
" ldadd" #mb " %[i], x30, %[v]\n" \
" add %[i], %[i], x30") \
@@ -310,8 +309,8 @@
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
- " nop\n"
- __LL_SC_ATOMIC64(and),
+ __LL_SC_ATOMIC64(and)
+ __nops(1),
/* LSE atomics */
" mvn %[i], %[i]\n"
" stclr %[i], %[v]")
@@ -328,8 +327,8 @@
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
- " nop\n" \
- __LL_SC_ATOMIC64(fetch_and##name), \
+ __LL_SC_ATOMIC64(fetch_and##name) \
+ __nops(1), \
/* LSE atomics */ \
" mvn %[i], %[i]\n" \
" ldclr" #mb " %[i], %[i], %[v]") \
@@ -354,8 +353,8 @@
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
- " nop\n"
- __LL_SC_ATOMIC64(sub),
+ __LL_SC_ATOMIC64(sub)
+ __nops(1),
/* LSE atomics */
" neg %[i], %[i]\n"
" stadd %[i], %[v]")
@@ -372,9 +371,8 @@
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
- " nop\n" \
__LL_SC_ATOMIC64(sub_return##name) \
- " nop", \
+ __nops(2), \
/* LSE atomics */ \
" neg %[i], %[i]\n" \
" ldadd" #mb " %[i], x30, %[v]\n" \
@@ -401,8 +399,8 @@
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
- " nop\n" \
- __LL_SC_ATOMIC64(fetch_sub##name), \
+ __LL_SC_ATOMIC64(fetch_sub##name) \
+ __nops(1), \
/* LSE atomics */ \
" neg %[i], %[i]\n" \
" ldadd" #mb " %[i], %[i], %[v]") \
@@ -426,13 +424,8 @@
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
- " nop\n"
__LL_SC_ATOMIC64(dec_if_positive)
- " nop\n"
- " nop\n"
- " nop\n"
- " nop\n"
- " nop",
+ __nops(6),
/* LSE atomics */
"1: ldr x30, %[v]\n"
" subs %[ret], x30, #1\n"
@@ -464,9 +457,8 @@
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
- " nop\n" \
- __LL_SC_CMPXCHG(name) \
- " nop", \
+ __LL_SC_CMPXCHG(name) \
+ __nops(2), \
/* LSE atomics */ \
" mov " #w "30, %" #w "[old]\n" \
" cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n" \
@@ -517,10 +509,8 @@
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
- " nop\n" \
- " nop\n" \
- " nop\n" \
- __LL_SC_CMPXCHG_DBL(name), \
+ __LL_SC_CMPXCHG_DBL(name) \
+ __nops(3), \
/* LSE atomics */ \
" casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
" eor %[old1], %[old1], %[oldval1]\n" \
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 4eea7f6..4e0497f 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -20,6 +20,9 @@
#ifndef __ASSEMBLY__
+#define __nops(n) ".rept " #n "\nnop\n.endr\n"
+#define nops(n) asm volatile(__nops(n))
+
#define sev() asm volatile("sev" : : : "memory")
#define wfe() asm volatile("wfe" : : : "memory")
#define wfi() asm volatile("wfi" : : : "memory")
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index c64268d..2e5fb97 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -68,6 +68,7 @@
extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
extern void flush_icache_range(unsigned long start, unsigned long end);
extern void __flush_dcache_area(void *addr, size_t len);
+extern void __clean_dcache_area_poc(void *addr, size_t len);
extern void __clean_dcache_area_pou(void *addr, size_t len);
extern long __flush_cache_user_range(unsigned long start, unsigned long end);
@@ -85,7 +86,7 @@
*/
extern void __dma_map_area(const void *, size_t, int);
extern void __dma_unmap_area(const void *, size_t, int);
-extern void __dma_flush_range(const void *, const void *);
+extern void __dma_flush_area(const void *, size_t);
/*
* Copy user data from/to a page which is mapped into a different
diff --git a/arch/arm64/include/asm/clocksource.h b/arch/arm64/include/asm/clocksource.h
new file mode 100644
index 0000000..0b350a7
--- /dev/null
+++ b/arch/arm64/include/asm/clocksource.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_CLOCKSOURCE_H
+#define _ASM_CLOCKSOURCE_H
+
+struct arch_clocksource_data {
+ bool vdso_direct; /* Usable for direct VDSO access? */
+};
+
+#endif
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index bd86a79..91b26d2 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -43,10 +43,8 @@
" cbnz %w1, 1b\n" \
" " #mb, \
/* LSE atomics */ \
- " nop\n" \
- " nop\n" \
" swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \
- " nop\n" \
+ __nops(3) \
" " #nop_lse) \
: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) \
: "r" (x) \
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 7099f26..758d74f 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -9,6 +9,8 @@
#ifndef __ASM_CPUFEATURE_H
#define __ASM_CPUFEATURE_H
+#include <linux/jump_label.h>
+
#include <asm/hwcap.h>
#include <asm/sysreg.h>
@@ -37,8 +39,9 @@
#define ARM64_WORKAROUND_CAVIUM_27456 12
#define ARM64_HAS_32BIT_EL0 13
#define ARM64_HYP_OFFSET_LOW 14
+#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
-#define ARM64_NCAPS 15
+#define ARM64_NCAPS 16
#ifndef __ASSEMBLY__
@@ -63,7 +66,7 @@
enum ftr_type type;
u8 shift;
u8 width;
- s64 safe_val; /* safe value for discrete features */
+ s64 safe_val; /* safe value for FTR_EXACT features */
};
/*
@@ -72,13 +75,14 @@
* @sys_val Safe value across the CPUs (system view)
*/
struct arm64_ftr_reg {
- u32 sys_id;
- const char *name;
- u64 strict_mask;
- u64 sys_val;
- struct arm64_ftr_bits *ftr_bits;
+ const char *name;
+ u64 strict_mask;
+ u64 sys_val;
+ const struct arm64_ftr_bits *ftr_bits;
};
+extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
+
/* scope of capability check */
enum {
SCOPE_SYSTEM,
@@ -109,6 +113,7 @@
};
extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
bool this_cpu_has_cap(unsigned int cap);
@@ -121,16 +126,21 @@
{
if (num >= ARM64_NCAPS)
return false;
- return test_bit(num, cpu_hwcaps);
+ if (__builtin_constant_p(num))
+ return static_branch_unlikely(&cpu_hwcap_keys[num]);
+ else
+ return test_bit(num, cpu_hwcaps);
}
static inline void cpus_set_cap(unsigned int num)
{
- if (num >= ARM64_NCAPS)
+ if (num >= ARM64_NCAPS) {
pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n",
num, ARM64_NCAPS);
- else
+ } else {
__set_bit(num, cpu_hwcaps);
+ static_branch_enable(&cpu_hwcap_keys[num]);
+ }
}
static inline int __attribute_const__
@@ -157,7 +167,7 @@
return cpuid_feature_extract_unsigned_field_width(features, field, 4);
}
-static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
+static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp)
{
return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
}
@@ -170,7 +180,7 @@
cpuid_feature_extract_unsigned_field(features, field);
}
-static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val)
+static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val)
{
return (s64)cpuid_feature_extract_field(val, ftrp->shift, ftrp->sign);
}
@@ -193,11 +203,11 @@
void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
const char *info);
void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps);
-void check_local_cpu_errata(void);
-void __init enable_errata_workarounds(void);
+void check_local_cpu_capabilities(void);
-void verify_local_cpu_errata(void);
-void verify_local_cpu_capabilities(void);
+void update_cpu_errata_workarounds(void);
+void __init enable_errata_workarounds(void);
+void verify_local_cpu_errata_workarounds(void);
u64 read_system_reg(u32 id);
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 9d9fd4b..26a68dd 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -93,11 +93,7 @@
#include <asm/sysreg.h>
-#define read_cpuid(reg) ({ \
- u64 __val; \
- asm("mrs_s %0, " __stringify(SYS_ ## reg) : "=r" (__val)); \
- __val; \
-})
+#define read_cpuid(reg) read_sysreg_s(SYS_ ## reg)
/*
* The CPU ID never changes at run time, so we might as well tell the
diff --git a/arch/arm64/include/asm/dcc.h b/arch/arm64/include/asm/dcc.h
index 65e0190..836b056 100644
--- a/arch/arm64/include/asm/dcc.h
+++ b/arch/arm64/include/asm/dcc.h
@@ -21,21 +21,16 @@
#define __ASM_DCC_H
#include <asm/barrier.h>
+#include <asm/sysreg.h>
static inline u32 __dcc_getstatus(void)
{
- u32 ret;
-
- asm volatile("mrs %0, mdccsr_el0" : "=r" (ret));
-
- return ret;
+ return read_sysreg(mdccsr_el0);
}
static inline char __dcc_getchar(void)
{
- char c;
-
- asm volatile("mrs %0, dbgdtrrx_el0" : "=r" (c));
+ char c = read_sysreg(dbgdtrrx_el0);
isb();
return c;
@@ -47,8 +42,7 @@
* The typecast is to make absolutely certain that 'c' is
* zero-extended.
*/
- asm volatile("msr dbgdtrtx_el0, %0"
- : : "r" ((unsigned long)(unsigned char)c));
+ write_sysreg((unsigned char)c, dbgdtrtx_el0);
isb();
}
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 4b6b3f7..b71420a 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -61,8 +61,6 @@
#define AARCH64_BREAK_KGDB_DYN_DBG \
(AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5))
-#define KGDB_DYN_BRK_INS_BYTE(x) \
- ((AARCH64_BREAK_KGDB_DYN_DBG >> (8 * (x))) & 0xff)
#define CACHE_FLUSH_IS_SAFE 1
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index f772e15..d14c478 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -78,6 +78,23 @@
#define ESR_ELx_IL (UL(1) << 25)
#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
+
+/* ISS field definitions shared by different classes */
+#define ESR_ELx_WNR (UL(1) << 6)
+
+/* Shared ISS field definitions for Data/Instruction aborts */
+#define ESR_ELx_EA (UL(1) << 9)
+#define ESR_ELx_S1PTW (UL(1) << 7)
+
+/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
+#define ESR_ELx_FSC (0x3F)
+#define ESR_ELx_FSC_TYPE (0x3C)
+#define ESR_ELx_FSC_EXTABT (0x10)
+#define ESR_ELx_FSC_ACCESS (0x08)
+#define ESR_ELx_FSC_FAULT (0x04)
+#define ESR_ELx_FSC_PERM (0x0C)
+
+/* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV (UL(1) << 24)
#define ESR_ELx_SAS_SHIFT (22)
#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT)
@@ -86,16 +103,9 @@
#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT)
#define ESR_ELx_SF (UL(1) << 15)
#define ESR_ELx_AR (UL(1) << 14)
-#define ESR_ELx_EA (UL(1) << 9)
#define ESR_ELx_CM (UL(1) << 8)
-#define ESR_ELx_S1PTW (UL(1) << 7)
-#define ESR_ELx_WNR (UL(1) << 6)
-#define ESR_ELx_FSC (0x3F)
-#define ESR_ELx_FSC_TYPE (0x3C)
-#define ESR_ELx_FSC_EXTABT (0x10)
-#define ESR_ELx_FSC_ACCESS (0x08)
-#define ESR_ELx_FSC_FAULT (0x04)
-#define ESR_ELx_FSC_PERM (0x0C)
+
+/* ISS field definitions for exceptions taken in to Hyp */
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
@@ -109,6 +119,62 @@
((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | \
((imm) & 0xffff))
+/* ISS field definitions for System instruction traps */
+#define ESR_ELx_SYS64_ISS_RES0_SHIFT 22
+#define ESR_ELx_SYS64_ISS_RES0_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_RES0_SHIFT)
+#define ESR_ELx_SYS64_ISS_DIR_MASK 0x1
+#define ESR_ELx_SYS64_ISS_DIR_READ 0x1
+#define ESR_ELx_SYS64_ISS_DIR_WRITE 0x0
+
+#define ESR_ELx_SYS64_ISS_RT_SHIFT 5
+#define ESR_ELx_SYS64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_SYS64_ISS_RT_SHIFT)
+#define ESR_ELx_SYS64_ISS_CRM_SHIFT 1
+#define ESR_ELx_SYS64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRM_SHIFT)
+#define ESR_ELx_SYS64_ISS_CRN_SHIFT 10
+#define ESR_ELx_SYS64_ISS_CRN_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRN_SHIFT)
+#define ESR_ELx_SYS64_ISS_OP1_SHIFT 14
+#define ESR_ELx_SYS64_ISS_OP1_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP1_SHIFT)
+#define ESR_ELx_SYS64_ISS_OP2_SHIFT 17
+#define ESR_ELx_SYS64_ISS_OP2_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP2_SHIFT)
+#define ESR_ELx_SYS64_ISS_OP0_SHIFT 20
+#define ESR_ELx_SYS64_ISS_OP0_MASK (UL(0x3) << ESR_ELx_SYS64_ISS_OP0_SHIFT)
+#define ESR_ELx_SYS64_ISS_SYS_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \
+ ESR_ELx_SYS64_ISS_OP1_MASK | \
+ ESR_ELx_SYS64_ISS_OP2_MASK | \
+ ESR_ELx_SYS64_ISS_CRN_MASK | \
+ ESR_ELx_SYS64_ISS_CRM_MASK)
+#define ESR_ELx_SYS64_ISS_SYS_VAL(op0, op1, op2, crn, crm) \
+ (((op0) << ESR_ELx_SYS64_ISS_OP0_SHIFT) | \
+ ((op1) << ESR_ELx_SYS64_ISS_OP1_SHIFT) | \
+ ((op2) << ESR_ELx_SYS64_ISS_OP2_SHIFT) | \
+ ((crn) << ESR_ELx_SYS64_ISS_CRN_SHIFT) | \
+ ((crm) << ESR_ELx_SYS64_ISS_CRM_SHIFT))
+
+#define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \
+ ESR_ELx_SYS64_ISS_DIR_MASK)
+/*
+ * User space cache operations have the following sysreg encoding
+ * in System instructions.
+ * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 14 }, WRITE (L=0)
+ */
+#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14
+#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11
+#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10
+#define ESR_ELx_SYS64_ISS_CRM_IC_IVAU 5
+
+#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \
+ ESR_ELx_SYS64_ISS_OP1_MASK | \
+ ESR_ELx_SYS64_ISS_OP2_MASK | \
+ ESR_ELx_SYS64_ISS_CRN_MASK | \
+ ESR_ELx_SYS64_ISS_DIR_MASK)
+#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \
+ (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \
+ ESR_ELx_SYS64_ISS_DIR_WRITE)
+
+#define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0)
+#define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \
+ ESR_ELx_SYS64_ISS_DIR_READ)
+
#ifndef __ASSEMBLY__
#include <asm/types.h>
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 115ea2a..9510ace 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -18,6 +18,7 @@
#include <asm/cputype.h>
#include <asm/cpufeature.h>
+#include <asm/sysreg.h>
#include <asm/virt.h>
#ifdef __KERNEL__
@@ -98,18 +99,18 @@
#define AARCH64_DBG_REG_WCR (AARCH64_DBG_REG_WVR + ARM_MAX_WRP)
/* Debug register names. */
-#define AARCH64_DBG_REG_NAME_BVR "bvr"
-#define AARCH64_DBG_REG_NAME_BCR "bcr"
-#define AARCH64_DBG_REG_NAME_WVR "wvr"
-#define AARCH64_DBG_REG_NAME_WCR "wcr"
+#define AARCH64_DBG_REG_NAME_BVR bvr
+#define AARCH64_DBG_REG_NAME_BCR bcr
+#define AARCH64_DBG_REG_NAME_WVR wvr
+#define AARCH64_DBG_REG_NAME_WCR wcr
/* Accessor macros for the debug registers. */
#define AARCH64_DBG_READ(N, REG, VAL) do {\
- asm volatile("mrs %0, dbg" REG #N "_el1" : "=r" (VAL));\
+ VAL = read_sysreg(dbg##REG##N##_el1);\
} while (0)
#define AARCH64_DBG_WRITE(N, REG, VAL) do {\
- asm volatile("msr dbg" REG #N "_el1, %0" :: "r" (VAL));\
+ write_sysreg(VAL, dbg##REG##N##_el1);\
} while (0)
struct task_struct;
@@ -141,8 +142,6 @@
}
#endif
-extern struct pmu perf_ops_bp;
-
/* Determine number of BRP registers available. */
static inline int get_num_brps(void)
{
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 1dbaa90..bc85366 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -246,7 +246,8 @@
static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \
{ return (val); }
-__AARCH64_INSN_FUNCS(adr_adrp, 0x1F000000, 0x10000000)
+__AARCH64_INSN_FUNCS(adr, 0x9F000000, 0x10000000)
+__AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000)
__AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000)
__AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
@@ -318,6 +319,11 @@
bool aarch64_insn_is_nop(u32 insn);
bool aarch64_insn_is_branch_imm(u32 insn);
+static inline bool aarch64_insn_is_adr_adrp(u32 insn)
+{
+ return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn);
+}
+
int aarch64_insn_read(void *addr, u32 *insnp);
int aarch64_insn_write(void *addr, u32 insn);
enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
@@ -398,6 +404,9 @@
int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+s32 aarch64_insn_adrp_get_offset(u32 insn);
+u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset);
+
bool aarch32_insn_is_wide(u32 insn);
#define A32_RN_OFFSET 16
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 9b6e408..0bba427 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -40,25 +40,25 @@
#define __raw_writeb __raw_writeb
static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
{
- asm volatile("strb %w0, [%1]" : : "r" (val), "r" (addr));
+ asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
}
#define __raw_writew __raw_writew
static inline void __raw_writew(u16 val, volatile void __iomem *addr)
{
- asm volatile("strh %w0, [%1]" : : "r" (val), "r" (addr));
+ asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
}
#define __raw_writel __raw_writel
static inline void __raw_writel(u32 val, volatile void __iomem *addr)
{
- asm volatile("str %w0, [%1]" : : "r" (val), "r" (addr));
+ asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
}
#define __raw_writeq __raw_writeq
static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
{
- asm volatile("str %0, [%1]" : : "r" (val), "r" (addr));
+ asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
}
#define __raw_readb __raw_readb
@@ -184,17 +184,6 @@
#define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); })
#define iowrite64be(v,p) ({ __iowmb(); __raw_writeq((__force __u64)cpu_to_be64(v), p); })
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#define xlate_dev_mem_ptr(p) __va(p)
-
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p) p
-
#include <asm-generic/io.h>
/*
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b6bb834..dff1098 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -99,14 +99,10 @@
.macro kern_hyp_va reg
alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
and \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK
-alternative_else
- nop
-alternative_endif
-alternative_if_not ARM64_HYP_OFFSET_LOW
- nop
-alternative_else
+alternative_else_nop_endif
+alternative_if ARM64_HYP_OFFSET_LOW
and \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK
-alternative_endif
+alternative_else_nop_endif
.endm
#else
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 31b7322..ba62df8 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -214,7 +214,7 @@
#ifndef CONFIG_SPARSEMEM_VMEMMAP
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#else
#define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page))
#define __page_to_voff(kaddr) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
@@ -222,11 +222,15 @@
#define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET))
#define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START))
-#define virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \
+#define _virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \
+ PHYS_OFFSET) >> PAGE_SHIFT)
#endif
#endif
+#define _virt_addr_is_linear(kaddr) (((u64)(kaddr)) >= PAGE_OFFSET)
+#define virt_addr_valid(kaddr) (_virt_addr_is_linear(kaddr) && \
+ _virt_addr_valid(kaddr))
+
#include <asm-generic/memory_model.h>
#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index b1892a0..a501853 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -27,22 +27,17 @@
#include <asm-generic/mm_hooks.h>
#include <asm/cputype.h>
#include <asm/pgtable.h>
+#include <asm/sysreg.h>
#include <asm/tlbflush.h>
-#ifdef CONFIG_PID_IN_CONTEXTIDR
static inline void contextidr_thread_switch(struct task_struct *next)
{
- asm(
- " msr contextidr_el1, %0\n"
- " isb"
- :
- : "r" (task_pid_nr(next)));
+ if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR))
+ return;
+
+ write_sysreg(task_pid_nr(next), contextidr_el1);
+ isb();
}
-#else
-static inline void contextidr_thread_switch(struct task_struct *next)
-{
-}
-#endif
/*
* Set TTBR0 to empty_zero_page. No translations will be possible via TTBR0.
@@ -51,11 +46,8 @@
{
unsigned long ttbr = virt_to_phys(empty_zero_page);
- asm(
- " msr ttbr0_el1, %0 // set TTBR0\n"
- " isb"
- :
- : "r" (ttbr));
+ write_sysreg(ttbr, ttbr0_el1);
+ isb();
}
/*
@@ -81,13 +73,11 @@
if (!__cpu_uses_extended_idmap())
return;
- asm volatile (
- " mrs %0, tcr_el1 ;"
- " bfi %0, %1, %2, %3 ;"
- " msr tcr_el1, %0 ;"
- " isb"
- : "=&r" (tcr)
- : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+ tcr = read_sysreg(tcr_el1);
+ tcr &= ~TCR_T0SZ_MASK;
+ tcr |= t0sz << TCR_T0SZ_OFFSET;
+ write_sysreg(tcr, tcr_el1);
+ isb();
}
#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS))
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index c3ae239..eb0c2bd 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -208,6 +208,7 @@
#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET)
#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x))
#define TCR_TxSZ_WIDTH 6
+#define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET)
#define TCR_IRGN0_SHIFT 8
#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 39f5252..2142c77 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -70,12 +70,13 @@
#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN)
#define __P000 PAGE_NONE
#define __P001 PAGE_READONLY
#define __P010 PAGE_COPY
#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_EXEC
+#define __P100 PAGE_EXECONLY
#define __P101 PAGE_READONLY_EXEC
#define __P110 PAGE_COPY_EXEC
#define __P111 PAGE_COPY_EXEC
@@ -84,7 +85,7 @@
#define __S001 PAGE_READONLY
#define __S010 PAGE_SHARED
#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_EXEC
+#define __S100 PAGE_EXECONLY
#define __S101 PAGE_READONLY_EXEC
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index e20bd43..ffbb9a5 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -73,7 +73,7 @@
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
-#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
+#define pte_ng(pte) (!!(pte_val(pte) & PTE_NG))
#ifdef CONFIG_ARM64_HW_AFDBM
#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -84,8 +84,8 @@
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_not_user(pte) \
- ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
+#define pte_valid_global(pte) \
+ ((pte_val(pte) & (PTE_VALID | PTE_NG)) == PTE_VALID)
#define pte_valid_young(pte) \
((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
@@ -155,6 +155,16 @@
return clear_pte_bit(pte, __pgprot(PTE_CONT));
}
+static inline pte_t pte_clear_rdonly(pte_t pte)
+{
+ return clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+}
+
+static inline pte_t pte_mkpresent(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(PTE_VALID));
+}
+
static inline pmd_t pmd_mkcont(pmd_t pmd)
{
return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
@@ -168,7 +178,7 @@
* Only if the new pte is valid and kernel, otherwise TLB maintenance
* or update_mmu_cache() have the necessary barriers.
*/
- if (pte_valid_not_user(pte)) {
+ if (pte_valid_global(pte)) {
dsb(ishst);
isb();
}
@@ -202,7 +212,7 @@
pte_val(pte) &= ~PTE_RDONLY;
else
pte_val(pte) |= PTE_RDONLY;
- if (pte_user(pte) && pte_exec(pte) && !pte_special(pte))
+ if (pte_ng(pte) && pte_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte, addr);
}
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index ace0a96..df2e53d 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -37,7 +37,6 @@
#include <asm/ptrace.h>
#include <asm/types.h>
-#ifdef __KERNEL__
#define STACK_TOP_MAX TASK_SIZE_64
#ifdef CONFIG_COMPAT
#define AARCH32_VECTORS_BASE 0xffff0000
@@ -49,7 +48,6 @@
extern phys_addr_t arm64_dma_phys_limit;
#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1)
-#endif /* __KERNEL__ */
struct debug_info {
/* Have we suspended stepping by a debugger? */
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
new file mode 100644
index 0000000..4e7e706
--- /dev/null
+++ b/arch/arm64/include/asm/sections.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SECTIONS_H
+#define __ASM_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+extern char __alt_instructions[], __alt_instructions_end[];
+extern char __exception_text_start[], __exception_text_end[];
+extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
+extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
+extern char __hyp_text_start[], __hyp_text_end[];
+extern char __idmap_text_start[], __idmap_text_end[];
+extern char __irqentry_text_start[], __irqentry_text_end[];
+extern char __mmuoff_data_start[], __mmuoff_data_end[];
+
+#endif /* __ASM_SECTIONS_H */
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index 89206b5..cae331d 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -66,8 +66,7 @@
ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" stxr %w1, %w0, %2\n"
-" nop\n"
-" nop\n",
+ __nops(2),
/* LSE atomics */
" mov %w1, %w0\n"
" cas %w0, %w0, %2\n"
@@ -99,9 +98,7 @@
/* LSE atomics */
" mov %w2, %w5\n"
" ldadda %w2, %w0, %3\n"
-" nop\n"
-" nop\n"
-" nop\n"
+ __nops(3)
)
/* Did we get the lock? */
@@ -165,8 +162,8 @@
" stlrh %w1, %0",
/* LSE atomics */
" mov %w1, #1\n"
- " nop\n"
- " staddlh %w1, %0")
+ " staddlh %w1, %0\n"
+ __nops(1))
: "=Q" (lock->owner), "=&r" (tmp)
:
: "memory");
@@ -212,7 +209,7 @@
" cbnz %w0, 1b\n"
" stxr %w0, %w2, %1\n"
" cbnz %w0, 2b\n"
- " nop",
+ __nops(1),
/* LSE atomics */
"1: mov %w0, wzr\n"
"2: casa %w0, %w2, %1\n"
@@ -241,8 +238,7 @@
/* LSE atomics */
" mov %w0, wzr\n"
" casa %w0, %w2, %1\n"
- " nop\n"
- " nop")
+ __nops(2))
: "=&r" (tmp), "+Q" (rw->lock)
: "r" (0x80000000)
: "memory");
@@ -290,8 +286,8 @@
" add %w0, %w0, #1\n"
" tbnz %w0, #31, 1b\n"
" stxr %w1, %w0, %2\n"
- " nop\n"
- " cbnz %w1, 2b",
+ " cbnz %w1, 2b\n"
+ __nops(1),
/* LSE atomics */
"1: wfe\n"
"2: ldxr %w0, %2\n"
@@ -317,9 +313,8 @@
" cbnz %w1, 1b",
/* LSE atomics */
" movn %w0, #0\n"
- " nop\n"
- " nop\n"
- " staddl %w0, %2")
+ " staddl %w0, %2\n"
+ __nops(2))
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
:
: "memory");
@@ -344,7 +339,7 @@
" tbnz %w1, #31, 1f\n"
" casa %w0, %w1, %2\n"
" sbc %w1, %w1, %w0\n"
- " nop\n"
+ __nops(1)
"1:")
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
:
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 024d623..b8a313f 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -47,4 +47,7 @@
int arch_hibernation_header_save(void *addr, unsigned int max_size);
int arch_hibernation_header_restore(void *addr);
+/* Used to resume on the CPU we hibernated on */
+int hibernate_resume_nonboot_cpu_disable(void);
+
#endif
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index cc06794..e8d46e8 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -100,6 +100,7 @@
/* SCTLR_EL1 specific flags. */
#define SCTLR_EL1_UCI (1 << 26)
#define SCTLR_EL1_SPAN (1 << 23)
+#define SCTLR_EL1_UCT (1 << 15)
#define SCTLR_EL1_SED (1 << 8)
#define SCTLR_EL1_CP15BEN (1 << 5)
@@ -253,16 +254,6 @@
" .endm\n"
);
-static inline void config_sctlr_el1(u32 clear, u32 set)
-{
- u32 val;
-
- asm volatile("mrs %0, sctlr_el1" : "=r" (val));
- val &= ~clear;
- val |= set;
- asm volatile("msr sctlr_el1, %0" : : "r" (val));
-}
-
/*
* Unlike read_cpuid, calls to read_sysreg are never expected to be
* optimized away or replaced with synthetic values.
@@ -273,12 +264,41 @@
__val; \
})
+/*
+ * The "Z" constraint normally means a zero immediate, but when combined with
+ * the "%x0" template means XZR.
+ */
#define write_sysreg(v, r) do { \
u64 __val = (u64)v; \
- asm volatile("msr " __stringify(r) ", %0" \
- : : "r" (__val)); \
+ asm volatile("msr " __stringify(r) ", %x0" \
+ : : "rZ" (__val)); \
} while (0)
+/*
+ * For registers without architectural names, or simply unsupported by
+ * GAS.
+ */
+#define read_sysreg_s(r) ({ \
+ u64 __val; \
+ asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val)); \
+ __val; \
+})
+
+#define write_sysreg_s(v, r) do { \
+ u64 __val = (u64)v; \
+ asm volatile("msr_s " __stringify(r) ", %0" : : "rZ" (__val)); \
+} while (0)
+
+static inline void config_sctlr_el1(u32 clear, u32 set)
+{
+ u32 val;
+
+ val = read_sysreg(sctlr_el1);
+ val &= ~clear;
+ val |= set;
+ write_sysreg(val, sctlr_el1);
+}
+
#endif
#endif /* __ASM_SYSREG_H */
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index 57f110b..bc81243 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -56,12 +56,6 @@
__show_ratelimited; \
})
-#define UDBG_UNDEFINED (1 << 0)
-#define UDBG_SYSCALL (1 << 1)
-#define UDBG_BADABORT (1 << 2)
-#define UDBG_SEGV (1 << 3)
-#define UDBG_BUS (1 << 4)
-
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SYSTEM_MISC_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index abd64bd..e9ea5a6 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -75,6 +75,9 @@
/*
* struct thread_info can be accessed directly via sp_el0.
+ *
+ * We don't use read_sysreg() as we want the compiler to cache the value where
+ * possible.
*/
static inline struct thread_info *current_thread_info(void)
{
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index b460ae2..deab523 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -25,6 +25,24 @@
#include <asm/cputype.h>
/*
+ * Raw TLBI operations.
+ *
+ * Where necessary, use the __tlbi() macro to avoid asm()
+ * boilerplate. Drivers and most kernel code should use the TLB
+ * management routines in preference to the macro below.
+ *
+ * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending
+ * on whether a particular TLBI operation takes an argument or
+ * not. The macros handles invoking the asm with or without the
+ * register argument as appropriate.
+ */
+#define __TLBI_0(op, arg) asm ("tlbi " #op)
+#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0" : : "r" (arg))
+#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
+
+#define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0)
+
+/*
* TLB Management
* ==============
*
@@ -66,7 +84,7 @@
static inline void local_flush_tlb_all(void)
{
dsb(nshst);
- asm("tlbi vmalle1");
+ __tlbi(vmalle1);
dsb(nsh);
isb();
}
@@ -74,7 +92,7 @@
static inline void flush_tlb_all(void)
{
dsb(ishst);
- asm("tlbi vmalle1is");
+ __tlbi(vmalle1is);
dsb(ish);
isb();
}
@@ -84,7 +102,7 @@
unsigned long asid = ASID(mm) << 48;
dsb(ishst);
- asm("tlbi aside1is, %0" : : "r" (asid));
+ __tlbi(aside1is, asid);
dsb(ish);
}
@@ -94,7 +112,7 @@
unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48);
dsb(ishst);
- asm("tlbi vale1is, %0" : : "r" (addr));
+ __tlbi(vale1is, addr);
dsb(ish);
}
@@ -122,9 +140,9 @@
dsb(ishst);
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
if (last_level)
- asm("tlbi vale1is, %0" : : "r"(addr));
+ __tlbi(vale1is, addr);
else
- asm("tlbi vae1is, %0" : : "r"(addr));
+ __tlbi(vae1is, addr);
}
dsb(ish);
}
@@ -149,7 +167,7 @@
dsb(ishst);
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
- asm("tlbi vaae1is, %0" : : "r"(addr));
+ __tlbi(vaae1is, addr);
dsb(ish);
isb();
}
@@ -163,7 +181,7 @@
{
unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
- asm("tlbi vae1is, %0" : : "r" (addr));
+ __tlbi(vae1is, addr);
dsb(ish);
}
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 9cd03f3..02e9035 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -19,6 +19,7 @@
#define __ASM_TRAP_H
#include <linux/list.h>
+#include <asm/sections.h>
struct pt_regs;
@@ -39,9 +40,6 @@
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static inline int __in_irqentry_text(unsigned long ptr)
{
- extern char __irqentry_text_start[];
- extern char __irqentry_text_end[];
-
return ptr >= (unsigned long)&__irqentry_text_start &&
ptr < (unsigned long)&__irqentry_text_end;
}
@@ -54,8 +52,6 @@
static inline int in_exception_text(unsigned long ptr)
{
- extern char __exception_text_start[];
- extern char __exception_text_end[];
int in;
in = ptr >= (unsigned long)&__exception_text_start &&
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 1788545..fea1073 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -45,6 +45,8 @@
#ifndef __ASSEMBLY__
#include <asm/ptrace.h>
+#include <asm/sections.h>
+#include <asm/sysreg.h>
/*
* __boot_cpu_mode records what mode CPUs were booted in.
@@ -75,10 +77,7 @@
static inline bool is_kernel_in_hyp_mode(void)
{
- u64 el;
-
- asm("mrs %0, CurrentEL" : "=r" (el));
- return el == CurrentEL_EL2;
+ return read_sysreg(CurrentEL) == CurrentEL_EL2;
}
#ifdef CONFIG_ARM64_VHE
@@ -87,14 +86,6 @@
static inline void verify_cpu_run_el(void) {}
#endif
-/* The section containing the hypervisor idmap text */
-extern char __hyp_idmap_text_start[];
-extern char __hyp_idmap_text_end[];
-
-/* The section containing the hypervisor text */
-extern char __hyp_text_start[];
-extern char __hyp_text_end[];
-
#endif /* __ASSEMBLY__ */
#endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 14f7b65..7d66bba 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -10,6 +10,8 @@
CFLAGS_REMOVE_insn.o = -pg
CFLAGS_REMOVE_return_address.o = -pg
+CFLAGS_setup.o = -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
# Object file lists.
arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
entry-fpsimd.o process.o ptrace.o setup.o signal.o \
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index 3e4f1a4..252a6d9 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -24,6 +24,7 @@
#include <linux/memblock.h>
#include <linux/of_fdt.h>
#include <linux/smp.h>
+#include <linux/serial_core.h>
#include <asm/cputype.h>
#include <asm/cpu_ops.h>
@@ -206,7 +207,7 @@
if (param_acpi_off ||
(!param_acpi_on && !param_acpi_force &&
of_scan_flat_dt(dt_scan_depth1_nodes, NULL)))
- return;
+ goto done;
/*
* ACPI is disabled at this point. Enable it in order to parse
@@ -226,6 +227,14 @@
if (!param_acpi_force)
disable_acpi();
}
+
+done:
+ if (acpi_disabled) {
+ if (earlycon_init_is_deferred)
+ early_init_dt_scan_chosen_stdout();
+ } else {
+ parse_spcr(earlycon_init_is_deferred);
+ }
}
#ifdef CONFIG_ACPI_APEI
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index f85149c..f01fab6 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -105,8 +105,10 @@
int ret;
ret = acpi_numa_init();
- if (ret)
+ if (ret) {
+ pr_info("Failed to initialise from firmware\n");
return ret;
+ }
return srat_disabled() ? -EINVAL : 0;
}
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index d2ee1b2..06d650f 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -25,14 +25,13 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/insn.h>
+#include <asm/sections.h>
#include <linux/stop_machine.h>
#define __ALT_PTR(a,f) (u32 *)((void *)&(a)->f + (a)->f)
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-
struct alt_region {
struct alt_instr *begin;
struct alt_instr *end;
@@ -59,6 +58,8 @@
BUG();
}
+#define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
+
static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr)
{
u32 insn;
@@ -80,6 +81,25 @@
offset = target - (unsigned long)insnptr;
insn = aarch64_set_branch_offset(insn, offset);
}
+ } else if (aarch64_insn_is_adrp(insn)) {
+ s32 orig_offset, new_offset;
+ unsigned long target;
+
+ /*
+ * If we're replacing an adrp instruction, which uses PC-relative
+ * immediate addressing, adjust the offset to reflect the new
+ * PC. adrp operates on 4K aligned addresses.
+ */
+ orig_offset = aarch64_insn_adrp_get_offset(insn);
+ target = align_down(altinsnptr, SZ_4K) + orig_offset;
+ new_offset = target - align_down(insnptr, SZ_4K);
+ insn = aarch64_insn_adrp_set_offset(insn, new_offset);
+ } else if (aarch64_insn_uses_literal(insn)) {
+ /*
+ * Disallow patching unhandled instructions using PC relative
+ * literal addresses
+ */
+ BUG();
}
return insn;
@@ -124,8 +144,8 @@
{
static int patched = 0;
struct alt_region region = {
- .begin = __alt_instructions,
- .end = __alt_instructions_end,
+ .begin = (struct alt_instr *)__alt_instructions,
+ .end = (struct alt_instr *)__alt_instructions_end,
};
/* We always have a CPU 0 at this point (__init) */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 05070b7..4a2f0f0 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -23,6 +23,7 @@
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
#include <linux/suspend.h>
+#include <asm/cpufeature.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/smp_plat.h>
@@ -145,5 +146,6 @@
DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
+ DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val));
return 0;
}
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index b8629d5..9617301 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -39,7 +39,7 @@
if (level > MAX_CACHE_LEVEL)
return CACHE_TYPE_NOCACHE;
- asm volatile ("mrs %x0, clidr_el1" : "=r" (clidr));
+ clidr = read_sysreg(clidr_el1);
return CLIDR_CTYPE(clidr, level);
}
@@ -55,11 +55,9 @@
WARN_ON(preemptible());
- /* Put value into CSSELR */
- asm volatile("msr csselr_el1, %x0" : : "r" (csselr));
+ write_sysreg(csselr, csselr_el1);
isb();
- /* Read result out of CCSIDR */
- asm volatile("mrs %x0, ccsidr_el1" : "=r" (ccsidr));
+ ccsidr = read_sysreg(ccsidr_el1);
return ccsidr;
}
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 82b0fc2..0150394 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -30,6 +30,21 @@
entry->midr_range_max);
}
+static bool
+has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+ return (read_cpuid_cachetype() & arm64_ftr_reg_ctrel0.strict_mask) !=
+ (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask);
+}
+
+static void cpu_enable_trap_ctr_access(void *__unused)
+{
+ /* Clear SCTLR_EL1.UCT */
+ config_sctlr_el1(SCTLR_EL1_UCT, 0);
+}
+
#define MIDR_RANGE(model, min, max) \
.def_scope = SCOPE_LOCAL_CPU, \
.matches = is_affected_midr_range, \
@@ -108,6 +123,13 @@
},
#endif
{
+ .desc = "Mismatched cache line size",
+ .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
+ .matches = has_mismatched_cache_line_size,
+ .def_scope = SCOPE_LOCAL_CPU,
+ .enable = cpu_enable_trap_ctr_access,
+ },
+ {
}
};
@@ -116,7 +138,7 @@
* and the related information is freed soon after. If the new CPU requires
* an errata not detected at boot, fail this CPU.
*/
-void verify_local_cpu_errata(void)
+void verify_local_cpu_errata_workarounds(void)
{
const struct arm64_cpu_capabilities *caps = arm64_errata;
@@ -131,7 +153,7 @@
}
}
-void check_local_cpu_errata(void)
+void update_cpu_errata_workarounds(void)
{
update_cpu_capabilities(arm64_errata, "enabling workaround for");
}
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index c7cfb8f..e137cea 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -17,6 +17,7 @@
*/
#include <linux/acpi.h>
+#include <linux/cache.h>
#include <linux/errno.h>
#include <linux/of.h>
#include <linux/string.h>
@@ -28,7 +29,7 @@
extern const struct cpu_operations acpi_parking_protocol_ops;
extern const struct cpu_operations cpu_psci_ops;
-const struct cpu_operations *cpu_ops[NR_CPUS];
+const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = {
&smp_spin_table_ops,
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 62272ea..d577f26 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -46,6 +46,9 @@
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
+EXPORT_SYMBOL(cpu_hwcap_keys);
+
#define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
{ \
.sign = SIGNED, \
@@ -74,7 +77,7 @@
cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused);
-static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
+static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),
@@ -87,7 +90,7 @@
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
+static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
@@ -101,7 +104,7 @@
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
+static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
@@ -119,7 +122,7 @@
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
@@ -130,7 +133,7 @@
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
@@ -139,7 +142,7 @@
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_ctr[] = {
+static const struct arm64_ftr_bits ftr_ctr[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
@@ -147,15 +150,21 @@
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
/*
* Linux can handle differing I-cache policies. Userspace JITs will
- * make use of *minLine
+ * make use of *minLine.
+ * If we have differing I-cache policies, report it as the weakest - AIVIVT.
*/
- ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */
+ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_id_mmfr0[] = {
+struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = {
+ .name = "SYS_CTR_EL0",
+ .ftr_bits = ftr_ctr
+};
+
+static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */
ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */
@@ -167,7 +176,7 @@
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
+static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
@@ -178,14 +187,14 @@
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_mvfr2[] = {
+static const struct arm64_ftr_bits ftr_mvfr2[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_dczid[] = {
+static const struct arm64_ftr_bits ftr_dczid[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0), /* RAZ */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */
@@ -193,7 +202,7 @@
};
-static struct arm64_ftr_bits ftr_id_isar5[] = {
+static const struct arm64_ftr_bits ftr_id_isar5[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0), /* RAZ */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0),
@@ -204,14 +213,14 @@
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_id_mmfr4[] = {
+static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_id_pfr0[] = {
+static const struct arm64_ftr_bits ftr_id_pfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0), /* RAZ */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */
@@ -220,7 +229,7 @@
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_id_dfr0[] = {
+static const struct arm64_ftr_bits ftr_id_dfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
@@ -238,7 +247,7 @@
* 0. Covers the following 32bit registers:
* id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
*/
-static struct arm64_ftr_bits ftr_generic_32bits[] = {
+static const struct arm64_ftr_bits ftr_generic_32bits[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
@@ -250,29 +259,32 @@
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_generic[] = {
+static const struct arm64_ftr_bits ftr_generic[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_generic32[] = {
+static const struct arm64_ftr_bits ftr_generic32[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0),
ARM64_FTR_END,
};
-static struct arm64_ftr_bits ftr_aa64raz[] = {
+static const struct arm64_ftr_bits ftr_aa64raz[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
ARM64_FTR_END,
};
-#define ARM64_FTR_REG(id, table) \
- { \
- .sys_id = id, \
+#define ARM64_FTR_REG(id, table) { \
+ .sys_id = id, \
+ .reg = &(struct arm64_ftr_reg){ \
.name = #id, \
.ftr_bits = &((table)[0]), \
- }
+ }}
-static struct arm64_ftr_reg arm64_ftr_regs[] = {
+static const struct __ftr_reg_entry {
+ u32 sys_id;
+ struct arm64_ftr_reg *reg;
+} arm64_ftr_regs[] = {
/* Op1 = 0, CRn = 0, CRm = 1 */
ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0),
@@ -315,7 +327,7 @@
ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
/* Op1 = 3, CRn = 0, CRm = 0 */
- ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr),
+ { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 },
ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
/* Op1 = 3, CRn = 14, CRm = 0 */
@@ -324,7 +336,7 @@
static int search_cmp_ftr_reg(const void *id, const void *regp)
{
- return (int)(unsigned long)id - (int)((const struct arm64_ftr_reg *)regp)->sys_id;
+ return (int)(unsigned long)id - (int)((const struct __ftr_reg_entry *)regp)->sys_id;
}
/*
@@ -339,14 +351,20 @@
*/
static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
{
- return bsearch((const void *)(unsigned long)sys_id,
+ const struct __ftr_reg_entry *ret;
+
+ ret = bsearch((const void *)(unsigned long)sys_id,
arm64_ftr_regs,
ARRAY_SIZE(arm64_ftr_regs),
sizeof(arm64_ftr_regs[0]),
search_cmp_ftr_reg);
+ if (ret)
+ return ret->reg;
+ return NULL;
}
-static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val)
+static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg,
+ s64 ftr_val)
{
u64 mask = arm64_ftr_mask(ftrp);
@@ -355,7 +373,8 @@
return reg;
}
-static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur)
+static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new,
+ s64 cur)
{
s64 ret = 0;
@@ -376,27 +395,13 @@
return ret;
}
-static int __init sort_cmp_ftr_regs(const void *a, const void *b)
-{
- return ((const struct arm64_ftr_reg *)a)->sys_id -
- ((const struct arm64_ftr_reg *)b)->sys_id;
-}
-
-static void __init swap_ftr_regs(void *a, void *b, int size)
-{
- struct arm64_ftr_reg tmp = *(struct arm64_ftr_reg *)a;
- *(struct arm64_ftr_reg *)a = *(struct arm64_ftr_reg *)b;
- *(struct arm64_ftr_reg *)b = tmp;
-}
-
static void __init sort_ftr_regs(void)
{
- /* Keep the array sorted so that we can do the binary search */
- sort(arm64_ftr_regs,
- ARRAY_SIZE(arm64_ftr_regs),
- sizeof(arm64_ftr_regs[0]),
- sort_cmp_ftr_regs,
- swap_ftr_regs);
+ int i;
+
+ /* Check that the array is sorted so that we can do the binary search */
+ for (i = 1; i < ARRAY_SIZE(arm64_ftr_regs); i++)
+ BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id);
}
/*
@@ -407,7 +412,7 @@
{
u64 val = 0;
u64 strict_mask = ~0x0ULL;
- struct arm64_ftr_bits *ftrp;
+ const struct arm64_ftr_bits *ftrp;
struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
BUG_ON(!reg);
@@ -464,7 +469,7 @@
static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
{
- struct arm64_ftr_bits *ftrp;
+ const struct arm64_ftr_bits *ftrp;
for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val);
@@ -1004,25 +1009,35 @@
* cannot do anything to fix it up and could cause unexpected failures. So
* we park the CPU.
*/
-void verify_local_cpu_capabilities(void)
+static void verify_local_cpu_capabilities(void)
{
-
- check_early_cpu_features();
-
- /*
- * If we haven't computed the system capabilities, there is nothing
- * to verify.
- */
- if (!sys_caps_initialised)
- return;
-
- verify_local_cpu_errata();
+ verify_local_cpu_errata_workarounds();
verify_local_cpu_features(arm64_features);
verify_local_elf_hwcaps(arm64_elf_hwcaps);
if (system_supports_32bit_el0())
verify_local_elf_hwcaps(compat_elf_hwcaps);
}
+void check_local_cpu_capabilities(void)
+{
+ /*
+ * All secondary CPUs should conform to the early CPU features
+ * in use by the kernel based on boot CPU.
+ */
+ check_early_cpu_features();
+
+ /*
+ * If we haven't finalised the system capabilities, this CPU gets
+ * a chance to update the errata work arounds.
+ * Otherwise, this CPU should verify that it has all the system
+ * advertised capabilities.
+ */
+ if (!sys_caps_initialised)
+ update_cpu_errata_workarounds();
+ else
+ verify_local_cpu_capabilities();
+}
+
static void __init setup_feature_capabilities(void)
{
update_cpu_capabilities(arm64_features, "detected feature:");
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index ed1b84f..b3d5b3e 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -363,8 +363,6 @@
}
cpuinfo_detect_icache_policy(info);
-
- check_local_cpu_errata();
}
void cpuinfo_store_cpu(void)
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 91fff48..73ae90e 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -46,16 +46,14 @@
{
unsigned long flags;
local_dbg_save(flags);
- asm volatile("msr mdscr_el1, %0" :: "r" (mdscr));
+ write_sysreg(mdscr, mdscr_el1);
local_dbg_restore(flags);
}
NOKPROBE_SYMBOL(mdscr_write);
static u32 mdscr_read(void)
{
- u32 mdscr;
- asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr));
- return mdscr;
+ return read_sysreg(mdscr_el1);
}
NOKPROBE_SYMBOL(mdscr_read);
@@ -132,36 +130,18 @@
/*
* OS lock clearing.
*/
-static void clear_os_lock(void *unused)
+static int clear_os_lock(unsigned int cpu)
{
- asm volatile("msr oslar_el1, %0" : : "r" (0));
+ write_sysreg(0, oslar_el1);
+ isb();
+ return 0;
}
-static int os_lock_notify(struct notifier_block *self,
- unsigned long action, void *data)
-{
- if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
- clear_os_lock(NULL);
- return NOTIFY_OK;
-}
-
-static struct notifier_block os_lock_nb = {
- .notifier_call = os_lock_notify,
-};
-
static int debug_monitors_init(void)
{
- cpu_notifier_register_begin();
-
- /* Clear the OS lock. */
- on_each_cpu(clear_os_lock, NULL, 1);
- isb();
-
- /* Register hotplug handler. */
- __register_cpu_notifier(&os_lock_nb);
-
- cpu_notifier_register_done();
- return 0;
+ return cpuhp_setup_state(CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
+ "CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING",
+ clear_os_lock, NULL);
}
postcore_initcall(debug_monitors_init);
@@ -254,7 +234,7 @@
return 0;
if (user_mode(regs)) {
- send_user_sigtrap(TRAP_HWBKPT);
+ send_user_sigtrap(TRAP_TRACE);
/*
* ptrace will disable single step unless explicitly
@@ -382,7 +362,7 @@
static int __init debug_traps_init(void)
{
hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
- TRAP_HWBKPT, "single-step handler");
+ TRAP_TRACE, "single-step handler");
hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
TRAP_BRKPT, "ptrace BRK handler");
return 0;
@@ -435,8 +415,10 @@
/* ptrace API */
void user_enable_single_step(struct task_struct *task)
{
- set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP);
- set_regs_spsr_ss(task_pt_regs(task));
+ struct thread_info *ti = task_thread_info(task);
+
+ if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP))
+ set_regs_spsr_ss(task_pt_regs(task));
}
NOKPROBE_SYMBOL(user_enable_single_step);
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 0f03a8f..aef02d2 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -219,7 +219,7 @@
*
* Run ftrace_return_to_handler() before going back to parent.
* @fp is checked against the value passed by ftrace_graph_caller()
- * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
+ * only when HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
*/
ENTRY(return_to_handler)
save_return_regs
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 441420c..223d54a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -104,7 +104,7 @@
str x20, [sp, #S_ORIG_ADDR_LIMIT]
mov x20, #TASK_SIZE_64
str x20, [tsk, #TI_ADDR_LIMIT]
- ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
+ /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
.endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
@@ -150,13 +150,7 @@
ldr x23, [sp, #S_SP] // load return stack pointer
msr sp_el0, x23
#ifdef CONFIG_ARM64_ERRATUM_845719
-alternative_if_not ARM64_WORKAROUND_845719
- nop
- nop
-#ifdef CONFIG_PID_IN_CONTEXTIDR
- nop
-#endif
-alternative_else
+alternative_if ARM64_WORKAROUND_845719
tbz x22, #4, 1f
#ifdef CONFIG_PID_IN_CONTEXTIDR
mrs x29, contextidr_el1
@@ -165,7 +159,7 @@
msr contextidr_el1, xzr
#endif
1:
-alternative_endif
+alternative_else_nop_endif
#endif
.endif
msr elr_el1, x21 // set up the return data
@@ -707,18 +701,13 @@
* Ok, we need to do extra processing, enter the slow path.
*/
work_pending:
- tbnz x1, #TIF_NEED_RESCHED, work_resched
- /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
mov x0, sp // 'regs'
- enable_irq // enable interrupts for do_notify_resume()
bl do_notify_resume
- b ret_to_user
-work_resched:
#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_off // the IRQs are off here, inform the tracing code
+ bl trace_hardirqs_on // enabled while in userspace
#endif
- bl schedule
-
+ ldr x1, [tsk, #TI_FLAGS] // re-check for single-step
+ b finish_ret_to_user
/*
* "slow" syscall return path.
*/
@@ -727,6 +716,7 @@
ldr x1, [tsk, #TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending
+finish_ret_to_user:
enable_step_tsk x1, x2
kernel_exit 0
ENDPROC(ret_to_user)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 975b274..394c61d 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -299,28 +299,16 @@
#endif /* CONFIG_CPU_PM */
#ifdef CONFIG_HOTPLUG_CPU
-static int fpsimd_cpu_hotplug_notifier(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+static int fpsimd_cpu_dead(unsigned int cpu)
{
- unsigned int cpu = (long)hcpu;
-
- switch (action) {
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- per_cpu(fpsimd_last_state, cpu) = NULL;
- break;
- }
- return NOTIFY_OK;
+ per_cpu(fpsimd_last_state, cpu) = NULL;
+ return 0;
}
-static struct notifier_block fpsimd_cpu_hotplug_notifier_block = {
- .notifier_call = fpsimd_cpu_hotplug_notifier,
-};
-
static inline void fpsimd_hotplug_init(void)
{
- register_cpu_notifier(&fpsimd_cpu_hotplug_notifier_block);
+ cpuhp_setup_state_nocalls(CPUHP_ARM64_FPSIMD_DEAD, "arm64/fpsimd:dead",
+ NULL, fpsimd_cpu_dead);
}
#else
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index ebecf9a..40ad08a 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -138,7 +138,7 @@
return;
err = ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer);
+ frame_pointer, NULL);
if (err == -EBUSY)
return;
else
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 3e7b050..427f6d3 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -208,13 +208,23 @@
__INIT
+ /*
+ * The following callee saved general purpose registers are used on the
+ * primary lowlevel boot path:
+ *
+ * Register Scope Purpose
+ * x21 stext() .. start_kernel() FDT pointer passed at boot in x0
+ * x23 stext() .. start_kernel() physical misalignment/KASLR offset
+ * x28 __create_page_tables() callee preserved temp register
+ * x19/x20 __primary_switch() callee preserved temp registers
+ */
ENTRY(stext)
bl preserve_boot_args
- bl el2_setup // Drop to EL1, w20=cpu_boot_mode
- adrp x24, __PHYS_OFFSET
- and x23, x24, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0
+ bl el2_setup // Drop to EL1, w0=cpu_boot_mode
+ adrp x23, __PHYS_OFFSET
+ and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0
bl set_cpu_boot_mode_flag
- bl __create_page_tables // x25=TTBR0, x26=TTBR1
+ bl __create_page_tables
/*
* The following calls CPU setup code, see arch/arm64/mm/proc.S for
* details.
@@ -222,9 +232,7 @@
* the TCR will have been set.
*/
bl __cpu_setup // initialise processor
- adr_l x27, __primary_switch // address to jump to after
- // MMU has been enabled
- b __enable_mmu
+ b __primary_switch
ENDPROC(stext)
/*
@@ -311,23 +319,21 @@
* been enabled
*/
__create_page_tables:
- adrp x25, idmap_pg_dir
- adrp x26, swapper_pg_dir
mov x28, lr
/*
* Invalidate the idmap and swapper page tables to avoid potential
* dirty cache lines being evicted.
*/
- mov x0, x25
- add x1, x26, #SWAPPER_DIR_SIZE
+ adrp x0, idmap_pg_dir
+ adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE
bl __inval_cache_range
/*
* Clear the idmap and swapper page tables.
*/
- mov x0, x25
- add x6, x26, #SWAPPER_DIR_SIZE
+ adrp x0, idmap_pg_dir
+ adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE
1: stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
@@ -340,7 +346,7 @@
/*
* Create the identity mapping.
*/
- mov x0, x25 // idmap_pg_dir
+ adrp x0, idmap_pg_dir
adrp x3, __idmap_text_start // __pa(__idmap_text_start)
#ifndef CONFIG_ARM64_VA_BITS_48
@@ -390,7 +396,7 @@
/*
* Map the kernel image (starting with PHYS_OFFSET).
*/
- mov x0, x26 // swapper_pg_dir
+ adrp x0, swapper_pg_dir
mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
add x5, x5, x23 // add KASLR displacement
create_pgd_entry x0, x5, x3, x6
@@ -405,8 +411,8 @@
* accesses (MMU disabled), invalidate the idmap and swapper page
* tables again to remove any speculatively loaded cache lines.
*/
- mov x0, x25
- add x1, x26, #SWAPPER_DIR_SIZE
+ adrp x0, idmap_pg_dir
+ adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE
dmb sy
bl __inval_cache_range
@@ -416,14 +422,27 @@
/*
* The following fragment of code is executed with the MMU enabled.
+ *
+ * x0 = __PHYS_OFFSET
*/
- .set initial_sp, init_thread_union + THREAD_START_SP
__primary_switched:
- mov x28, lr // preserve LR
+ adrp x4, init_thread_union
+ add sp, x4, #THREAD_SIZE
+ msr sp_el0, x4 // Save thread_info
+
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
isb
+ stp xzr, x30, [sp, #-16]!
+ mov x29, sp
+
+ str_l x21, __fdt_pointer, x5 // Save FDT pointer
+
+ ldr_l x4, kimage_vaddr // Save the offset between
+ sub x4, x4, x0 // the kernel virtual and
+ str_l x4, kimage_voffset, x5 // physical mappings
+
// Clear BSS
adr_l x0, __bss_start
mov x1, xzr
@@ -432,17 +451,6 @@
bl __pi_memset
dsb ishst // Make zero page visible to PTW
- adr_l sp, initial_sp, x4
- mov x4, sp
- and x4, x4, #~(THREAD_SIZE - 1)
- msr sp_el0, x4 // Save thread_info
- str_l x21, __fdt_pointer, x5 // Save FDT pointer
-
- ldr_l x4, kimage_vaddr // Save the offset between
- sub x4, x4, x24 // the kernel virtual and
- str_l x4, kimage_voffset, x5 // physical mappings
-
- mov x29, #0
#ifdef CONFIG_KASAN
bl kasan_early_init
#endif
@@ -454,8 +462,8 @@
bl kaslr_early_init // parse FDT for KASLR options
cbz x0, 0f // KASLR disabled? just proceed
orr x23, x23, x0 // record KASLR offset
- ret x28 // we must enable KASLR, return
- // to __enable_mmu()
+ ldp x29, x30, [sp], #16 // we must enable KASLR, return
+ ret // to __primary_switch()
0:
#endif
b start_kernel
@@ -465,7 +473,7 @@
* end early head section, begin head code that is also used for
* hotplug and needs to have the same protections as the text region
*/
- .section ".text","ax"
+ .section ".idmap.text","ax"
ENTRY(kimage_vaddr)
.quad _text - TEXT_OFFSET
@@ -490,7 +498,7 @@
CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1
CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1
msr sctlr_el1, x0
- mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1
+ mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1
isb
ret
@@ -586,7 +594,7 @@
cbz x2, install_el2_stub
- mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
+ mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
isb
ret
@@ -601,7 +609,7 @@
PSR_MODE_EL1h)
msr spsr_el2, x0
msr elr_el2, lr
- mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
+ mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
eret
ENDPROC(el2_setup)
@@ -611,27 +619,39 @@
*/
set_cpu_boot_mode_flag:
adr_l x1, __boot_cpu_mode
- cmp w20, #BOOT_CPU_MODE_EL2
+ cmp w0, #BOOT_CPU_MODE_EL2
b.ne 1f
add x1, x1, #4
-1: str w20, [x1] // This CPU has booted in EL1
+1: str w0, [x1] // This CPU has booted in EL1
dmb sy
dc ivac, x1 // Invalidate potentially stale cache line
ret
ENDPROC(set_cpu_boot_mode_flag)
/*
+ * These values are written with the MMU off, but read with the MMU on.
+ * Writers will invalidate the corresponding address, discarding up to a
+ * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures
+ * sufficient alignment that the CWG doesn't overlap another section.
+ */
+ .pushsection ".mmuoff.data.write", "aw"
+/*
* We need to find out the CPU boot mode long after boot, so we need to
* store it in a writable variable.
*
* This is not in .bss, because we set it sufficiently early that the boot-time
* zeroing of .bss would clobber it.
*/
- .pushsection .data..cacheline_aligned
- .align L1_CACHE_SHIFT
ENTRY(__boot_cpu_mode)
.long BOOT_CPU_MODE_EL2
.long BOOT_CPU_MODE_EL1
+/*
+ * The booting CPU updates the failed status @__early_cpu_boot_status,
+ * with MMU turned off.
+ */
+ENTRY(__early_cpu_boot_status)
+ .long 0
+
.popsection
/*
@@ -639,7 +659,7 @@
* cores are held until we're ready for them to initialise.
*/
ENTRY(secondary_holding_pen)
- bl el2_setup // Drop to EL1, w20=cpu_boot_mode
+ bl el2_setup // Drop to EL1, w0=cpu_boot_mode
bl set_cpu_boot_mode_flag
mrs x0, mpidr_el1
mov_q x1, MPIDR_HWID_BITMASK
@@ -666,12 +686,10 @@
/*
* Common entry point for secondary CPUs.
*/
- adrp x25, idmap_pg_dir
- adrp x26, swapper_pg_dir
bl __cpu_setup // initialise processor
-
- adr_l x27, __secondary_switch // address to jump to after enabling the MMU
- b __enable_mmu
+ bl __enable_mmu
+ ldr x8, =__secondary_switched
+ br x8
ENDPROC(secondary_startup)
__secondary_switched:
@@ -706,33 +724,27 @@
dc ivac, \tmp1 // Invalidate potentially stale cache line
.endm
- .pushsection .data..cacheline_aligned
- .align L1_CACHE_SHIFT
-ENTRY(__early_cpu_boot_status)
- .long 0
- .popsection
-
/*
* Enable the MMU.
*
* x0 = SCTLR_EL1 value for turning on the MMU.
- * x27 = *virtual* address to jump to upon completion
*
- * Other registers depend on the function called upon completion.
+ * Returns to the caller via x30/lr. This requires the caller to be covered
+ * by the .idmap.text section.
*
* Checks if the selected granule size is supported by the CPU.
* If it isn't, park the CPU
*/
- .section ".idmap.text", "ax"
ENTRY(__enable_mmu)
- mrs x22, sctlr_el1 // preserve old SCTLR_EL1 value
mrs x1, ID_AA64MMFR0_EL1
ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
b.ne __no_granule_support
update_early_cpu_boot_status 0, x1, x2
- msr ttbr0_el1, x25 // load TTBR0
- msr ttbr1_el1, x26 // load TTBR1
+ adrp x1, idmap_pg_dir
+ adrp x2, swapper_pg_dir
+ msr ttbr0_el1, x1 // load TTBR0
+ msr ttbr1_el1, x2 // load TTBR1
isb
msr sctlr_el1, x0
isb
@@ -744,29 +756,7 @@
ic iallu
dsb nsh
isb
-#ifdef CONFIG_RANDOMIZE_BASE
- mov x19, x0 // preserve new SCTLR_EL1 value
- blr x27
-
- /*
- * If we return here, we have a KASLR displacement in x23 which we need
- * to take into account by discarding the current kernel mapping and
- * creating a new one.
- */
- msr sctlr_el1, x22 // disable the MMU
- isb
- bl __create_page_tables // recreate kernel mapping
-
- tlbi vmalle1 // Remove any stale TLB entries
- dsb nsh
-
- msr sctlr_el1, x19 // re-enable the MMU
- isb
- ic iallu // flush instructions fetched
- dsb nsh // via old mapping
- isb
-#endif
- br x27
+ ret
ENDPROC(__enable_mmu)
__no_granule_support:
@@ -775,11 +765,11 @@
1:
wfe
wfi
- b 1b
+ b 1b
ENDPROC(__no_granule_support)
-__primary_switch:
#ifdef CONFIG_RELOCATABLE
+__relocate_kernel:
/*
* Iterate over each entry in the relocation table, and apply the
* relocations in place.
@@ -801,14 +791,46 @@
add x13, x13, x23 // relocate
str x13, [x11, x23]
b 0b
+1: ret
+ENDPROC(__relocate_kernel)
+#endif
-1:
+__primary_switch:
+#ifdef CONFIG_RANDOMIZE_BASE
+ mov x19, x0 // preserve new SCTLR_EL1 value
+ mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value
+#endif
+
+ bl __enable_mmu
+#ifdef CONFIG_RELOCATABLE
+ bl __relocate_kernel
+#ifdef CONFIG_RANDOMIZE_BASE
+ ldr x8, =__primary_switched
+ adrp x0, __PHYS_OFFSET
+ blr x8
+
+ /*
+ * If we return here, we have a KASLR displacement in x23 which we need
+ * to take into account by discarding the current kernel mapping and
+ * creating a new one.
+ */
+ msr sctlr_el1, x20 // disable the MMU
+ isb
+ bl __create_page_tables // recreate kernel mapping
+
+ tlbi vmalle1 // Remove any stale TLB entries
+ dsb nsh
+
+ msr sctlr_el1, x19 // re-enable the MMU
+ isb
+ ic iallu // flush instructions fetched
+ dsb nsh // via old mapping
+ isb
+
+ bl __relocate_kernel
+#endif
#endif
ldr x8, =__primary_switched
+ adrp x0, __PHYS_OFFSET
br x8
ENDPROC(__primary_switch)
-
-__secondary_switch:
- ldr x8, =__secondary_switched
- br x8
-ENDPROC(__secondary_switch)
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index 46f29b6..e56d848 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -36,8 +36,8 @@
.macro break_before_make_ttbr_switch zero_page, page_table
msr ttbr1_el1, \zero_page
isb
- tlbi vmalle1is
- dsb ish
+ tlbi vmalle1
+ dsb nsh
msr ttbr1_el1, \page_table
isb
.endm
@@ -96,7 +96,7 @@
add x1, x10, #PAGE_SIZE
/* Clean the copied page to PoU - based on flush_icache_range() */
- dcache_line_size x2, x3
+ raw_dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x10, x3
2: dc cvau, x4 /* clean D line / unified line */
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 65d81f9..d55a7b0 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -15,9 +15,9 @@
* License terms: GNU General Public License (GPL) version 2
*/
#define pr_fmt(x) "hibernate: " x
+#include <linux/cpu.h>
#include <linux/kvm_host.h>
#include <linux/mm.h>
-#include <linux/notifier.h>
#include <linux/pm.h>
#include <linux/sched.h>
#include <linux/suspend.h>
@@ -26,6 +26,7 @@
#include <asm/barrier.h>
#include <asm/cacheflush.h>
+#include <asm/cputype.h>
#include <asm/irqflags.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
@@ -34,6 +35,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/sections.h>
#include <asm/smp.h>
+#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/sysreg.h>
#include <asm/virt.h>
@@ -54,12 +56,6 @@
/* Do we need to reset el2? */
#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
-/*
- * Start/end of the hibernate exit code, this must be copied to a 'safe'
- * location in memory, and executed from there.
- */
-extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
-
/* temporary el2 vectors in the __hibernate_exit_text section. */
extern char hibernate_el2_vectors[];
@@ -67,6 +63,12 @@
extern char __hyp_stub_vectors[];
/*
+ * The logical cpu number we should resume on, initialised to a non-cpu
+ * number.
+ */
+static int sleep_cpu = -EINVAL;
+
+/*
* Values that may not change over hibernate/resume. We put the build number
* and date in here so that we guarantee not to resume with a different
* kernel.
@@ -88,6 +90,8 @@
* re-configure el2.
*/
phys_addr_t __hyp_stub_vectors;
+
+ u64 sleep_cpu_mpidr;
} resume_hdr;
static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i)
@@ -130,12 +134,22 @@
else
hdr->__hyp_stub_vectors = 0;
+ /* Save the mpidr of the cpu we called cpu_suspend() on... */
+ if (sleep_cpu < 0) {
+ pr_err("Failing to hibernate on an unkown CPU.\n");
+ return -ENODEV;
+ }
+ hdr->sleep_cpu_mpidr = cpu_logical_map(sleep_cpu);
+ pr_info("Hibernating on CPU %d [mpidr:0x%llx]\n", sleep_cpu,
+ hdr->sleep_cpu_mpidr);
+
return 0;
}
EXPORT_SYMBOL(arch_hibernation_header_save);
int arch_hibernation_header_restore(void *addr)
{
+ int ret;
struct arch_hibernate_hdr_invariants invariants;
struct arch_hibernate_hdr *hdr = addr;
@@ -145,6 +159,24 @@
return -EINVAL;
}
+ sleep_cpu = get_logical_index(hdr->sleep_cpu_mpidr);
+ pr_info("Hibernated on CPU %d [mpidr:0x%llx]\n", sleep_cpu,
+ hdr->sleep_cpu_mpidr);
+ if (sleep_cpu < 0) {
+ pr_crit("Hibernated on a CPU not known to this kernel!\n");
+ sleep_cpu = -EINVAL;
+ return -EINVAL;
+ }
+ if (!cpu_online(sleep_cpu)) {
+ pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
+ ret = cpu_up(sleep_cpu);
+ if (ret) {
+ pr_err("Failed to bring hibernate-CPU up!\n");
+ sleep_cpu = -EINVAL;
+ return ret;
+ }
+ }
+
resume_hdr = *hdr;
return 0;
@@ -241,6 +273,7 @@
return rc;
}
+#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start))
int swsusp_arch_suspend(void)
{
@@ -256,10 +289,16 @@
local_dbg_save(flags);
if (__cpu_suspend_enter(&state)) {
+ sleep_cpu = smp_processor_id();
ret = swsusp_save();
} else {
- /* Clean kernel to PoC for secondary core startup */
- __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START);
+ /* Clean kernel core startup/idle code to PoC*/
+ dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end);
+ dcache_clean_range(__idmap_text_start, __idmap_text_end);
+
+ /* Clean kvm setup code to PoC? */
+ if (el2_reset_needed())
+ dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
/*
* Tell the hibernation core that we've just restored
@@ -267,6 +306,7 @@
*/
in_suspend = 0;
+ sleep_cpu = -EINVAL;
__cpu_suspend_exit();
}
@@ -275,6 +315,33 @@
return ret;
}
+static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
+{
+ pte_t pte = *src_pte;
+
+ if (pte_valid(pte)) {
+ /*
+ * Resume will overwrite areas that may be marked
+ * read only (code, rodata). Clear the RDONLY bit from
+ * the temporary mappings we use during restore.
+ */
+ set_pte(dst_pte, pte_clear_rdonly(pte));
+ } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
+ /*
+ * debug_pagealloc will removed the PTE_VALID bit if
+ * the page isn't in use by the resume kernel. It may have
+ * been in use by the original kernel, in which case we need
+ * to put it back in our copy to do the restore.
+ *
+ * Before marking this entry valid, check the pfn should
+ * be mapped.
+ */
+ BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+ set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte)));
+ }
+}
+
static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
unsigned long end)
{
@@ -290,13 +357,7 @@
src_pte = pte_offset_kernel(src_pmd, start);
do {
- if (!pte_none(*src_pte))
- /*
- * Resume will overwrite areas that may be marked
- * read only (code, rodata). Clear the RDONLY bit from
- * the temporary mappings we use during restore.
- */
- set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY));
+ _copy_pte(dst_pte, src_pte, addr);
} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
return 0;
@@ -483,27 +544,12 @@
return rc;
}
-static int check_boot_cpu_online_pm_callback(struct notifier_block *nb,
- unsigned long action, void *ptr)
+int hibernate_resume_nonboot_cpu_disable(void)
{
- if (action == PM_HIBERNATION_PREPARE &&
- cpumask_first(cpu_online_mask) != 0) {
- pr_warn("CPU0 is offline.\n");
- return notifier_from_errno(-ENODEV);
+ if (sleep_cpu < 0) {
+ pr_err("Failing to resume from hibernate on an unkown CPU.\n");
+ return -ENODEV;
}
- return NOTIFY_OK;
+ return freeze_secondary_cpus(sleep_cpu);
}
-
-static int __init check_boot_cpu_online_init(void)
-{
- /*
- * Set this pm_notifier callback with a lower priority than
- * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be
- * called earlier to disable cpu hotplug before the cpu online check.
- */
- pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX);
-
- return 0;
-}
-core_initcall(check_boot_cpu_online_init);
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 26a6bf7..948b731 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -857,7 +857,7 @@
/*
* CPU initialisation.
*/
-static void hw_breakpoint_reset(void *unused)
+static int hw_breakpoint_reset(unsigned int cpu)
{
int i;
struct perf_event **slots;
@@ -888,28 +888,14 @@
write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL);
}
}
-}
-static int hw_breakpoint_reset_notify(struct notifier_block *self,
- unsigned long action,
- void *hcpu)
-{
- if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) {
- local_irq_disable();
- hw_breakpoint_reset(NULL);
- local_irq_enable();
- }
- return NOTIFY_OK;
+ return 0;
}
-static struct notifier_block hw_breakpoint_reset_nb = {
- .notifier_call = hw_breakpoint_reset_notify,
-};
-
#ifdef CONFIG_CPU_PM
-extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *));
+extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int));
#else
-static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
+static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int))
{
}
#endif
@@ -919,36 +905,34 @@
*/
static int __init arch_hw_breakpoint_init(void)
{
+ int ret;
+
core_num_brps = get_num_brps();
core_num_wrps = get_num_wrps();
pr_info("found %d breakpoint and %d watchpoint registers.\n",
core_num_brps, core_num_wrps);
- cpu_notifier_register_begin();
-
- /*
- * Reset the breakpoint resources. We assume that a halting
- * debugger will leave the world in a nice state for us.
- */
- smp_call_function(hw_breakpoint_reset, NULL, 1);
- hw_breakpoint_reset(NULL);
-
/* Register debug fault handlers. */
hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP,
TRAP_HWBKPT, "hw-breakpoint handler");
hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP,
TRAP_HWBKPT, "hw-watchpoint handler");
- /* Register hotplug notifier. */
- __register_cpu_notifier(&hw_breakpoint_reset_nb);
-
- cpu_notifier_register_done();
+ /*
+ * Reset the breakpoint resources. We assume that a halting
+ * debugger will leave the world in a nice state for us.
+ */
+ ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
+ "CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING",
+ hw_breakpoint_reset, NULL);
+ if (ret)
+ pr_err("failed to register CPU hotplug notifier: %d\n", ret);
/* Register cpu_suspend hw breakpoint restore hook */
cpu_suspend_set_dbg_restorer(hw_breakpoint_reset);
- return 0;
+ return ret;
}
arch_initcall(arch_hw_breakpoint_init);
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 63f9432..6f2ac4f 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -96,7 +96,7 @@
if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
page = vmalloc_to_page(addr);
- else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA))
+ else if (!module)
page = pfn_to_page(PHYS_PFN(__pa(addr)));
else
return addr;
@@ -1202,6 +1202,19 @@
BUG();
}
+s32 aarch64_insn_adrp_get_offset(u32 insn)
+{
+ BUG_ON(!aarch64_insn_is_adrp(insn));
+ return aarch64_insn_decode_immediate(AARCH64_INSN_IMM_ADR, insn) << 12;
+}
+
+u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset)
+{
+ BUG_ON(!aarch64_insn_is_adrp(insn));
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_ADR, insn,
+ offset >> 12);
+}
+
/*
* Extract the Op/CR data from a msr/mrs instruction.
*/
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index b054691..769f24e 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -6,6 +6,7 @@
* published by the Free Software Foundation.
*/
+#include <linux/cache.h>
#include <linux/crc32.h>
#include <linux/init.h>
#include <linux/libfdt.h>
@@ -20,7 +21,7 @@
#include <asm/pgtable.h>
#include <asm/sections.h>
-u64 __read_mostly module_alloc_base;
+u64 __ro_after_init module_alloc_base;
u16 __initdata memstart_offset_seed;
static __init u64 get_kaslr_seed(void *fdt)
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 8c57f64..e017a94 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -19,10 +19,13 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/bug.h>
#include <linux/irq.h>
#include <linux/kdebug.h>
#include <linux/kgdb.h>
#include <linux/kprobes.h>
+#include <asm/debug-monitors.h>
+#include <asm/insn.h>
#include <asm/traps.h>
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
@@ -338,15 +341,24 @@
unregister_die_notifier(&kgdb_notifier);
}
-/*
- * ARM instructions are always in LE.
- * Break instruction is encoded in LE format
- */
-struct kgdb_arch arch_kgdb_ops = {
- .gdb_bpt_instr = {
- KGDB_DYN_BRK_INS_BYTE(0),
- KGDB_DYN_BRK_INS_BYTE(1),
- KGDB_DYN_BRK_INS_BYTE(2),
- KGDB_DYN_BRK_INS_BYTE(3),
- }
-};
+struct kgdb_arch arch_kgdb_ops;
+
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+
+ BUILD_BUG_ON(AARCH64_INSN_SIZE != BREAK_INSTR_SIZE);
+
+ err = aarch64_insn_read((void *)bpt->bpt_addr, (u32 *)bpt->saved_instr);
+ if (err)
+ return err;
+
+ return aarch64_insn_write((void *)bpt->bpt_addr,
+ (u32)AARCH64_BREAK_KGDB_DYN_DBG);
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+ return aarch64_insn_write((void *)bpt->bpt_addr,
+ *(u32 *)bpt->saved_instr);
+}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 838ccf1..a9310a6 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -24,6 +24,7 @@
#include <asm/sysreg.h>
#include <asm/virt.h>
+#include <linux/acpi.h>
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
@@ -190,13 +191,23 @@
#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED
/* PMUv3 HW events mapping. */
+
+/*
+ * ARMv8 Architectural defined events, not all of these may
+ * be supported on any given implementation. Undefined events will
+ * be disabled at run-time.
+ */
static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
};
/* ARM Cortex-A53 HW events mapping. */
@@ -258,6 +269,15 @@
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL,
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
+ [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB,
+
[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
[C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
@@ -523,12 +543,6 @@
.attrs = armv8_pmuv3_format_attrs,
};
-static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
- &armv8_pmuv3_events_attr_group,
- &armv8_pmuv3_format_attr_group,
- NULL,
-};
-
/*
* Perf Events' indices
*/
@@ -905,9 +919,22 @@
static int armv8_pmuv3_map_event(struct perf_event *event)
{
- return armpmu_map_event(event, &armv8_pmuv3_perf_map,
- &armv8_pmuv3_perf_cache_map,
- ARMV8_PMU_EVTYPE_EVENT);
+ int hw_event_id;
+ struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+
+ hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map,
+ &armv8_pmuv3_perf_cache_map,
+ ARMV8_PMU_EVTYPE_EVENT);
+ if (hw_event_id < 0)
+ return hw_event_id;
+
+ /* disable micro/arch events not supported by this PMU */
+ if ((hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) &&
+ !test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
+ return -EOPNOTSUPP;
+ }
+
+ return hw_event_id;
}
static int armv8_a53_map_event(struct perf_event *event)
@@ -985,7 +1012,10 @@
armv8_pmu_init(cpu_pmu);
cpu_pmu->name = "armv8_pmuv3";
cpu_pmu->map_event = armv8_pmuv3_map_event;
- cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv8_pmuv3_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv8_pmuv3_format_attr_group;
return armv8pmu_probe_pmu(cpu_pmu);
}
@@ -994,7 +1024,10 @@
armv8_pmu_init(cpu_pmu);
cpu_pmu->name = "armv8_cortex_a53";
cpu_pmu->map_event = armv8_a53_map_event;
- cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv8_pmuv3_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv8_pmuv3_format_attr_group;
return armv8pmu_probe_pmu(cpu_pmu);
}
@@ -1003,7 +1036,10 @@
armv8_pmu_init(cpu_pmu);
cpu_pmu->name = "armv8_cortex_a57";
cpu_pmu->map_event = armv8_a57_map_event;
- cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv8_pmuv3_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv8_pmuv3_format_attr_group;
return armv8pmu_probe_pmu(cpu_pmu);
}
@@ -1012,7 +1048,10 @@
armv8_pmu_init(cpu_pmu);
cpu_pmu->name = "armv8_cortex_a72";
cpu_pmu->map_event = armv8_a57_map_event;
- cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv8_pmuv3_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv8_pmuv3_format_attr_group;
return armv8pmu_probe_pmu(cpu_pmu);
}
@@ -1021,7 +1060,10 @@
armv8_pmu_init(cpu_pmu);
cpu_pmu->name = "armv8_cavium_thunder";
cpu_pmu->map_event = armv8_thunder_map_event;
- cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv8_pmuv3_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv8_pmuv3_format_attr_group;
return armv8pmu_probe_pmu(cpu_pmu);
}
@@ -1030,7 +1072,10 @@
armv8_pmu_init(cpu_pmu);
cpu_pmu->name = "armv8_brcm_vulcan";
cpu_pmu->map_event = armv8_vulcan_map_event;
- cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv8_pmuv3_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv8_pmuv3_format_attr_group;
return armv8pmu_probe_pmu(cpu_pmu);
}
@@ -1044,21 +1089,32 @@
{},
};
+/*
+ * Non DT systems have their micro/arch events probed at run-time.
+ * A fairly complete list of generic events are provided and ones that
+ * aren't supported by the current PMU are disabled.
+ */
+static const struct pmu_probe_info armv8_pmu_probe_table[] = {
+ PMU_PROBE(0, 0, armv8_pmuv3_init), /* enable all defined counters */
+ { /* sentinel value */ }
+};
+
static int armv8_pmu_device_probe(struct platform_device *pdev)
{
- return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
+ if (acpi_disabled)
+ return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids,
+ NULL);
+
+ return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids,
+ armv8_pmu_probe_table);
}
static struct platform_driver armv8_pmu_driver = {
.driver = {
- .name = "armv8-pmu",
+ .name = ARMV8_PMU_PDEV_NAME,
.of_match_table = armv8_pmu_of_device_ids,
},
.probe = armv8_pmu_device_probe,
};
-static int __init register_armv8_pmu_driver(void)
-{
- return platform_driver_register(&armv8_pmu_driver);
-}
-device_initcall(register_armv8_pmu_driver);
+builtin_platform_driver(armv8_pmu_driver);
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index 37e47a9..d1731bf 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -16,6 +16,7 @@
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/module.h>
+#include <linux/kallsyms.h>
#include <asm/kprobes.h>
#include <asm/insn.h>
#include <asm/sections.h>
@@ -122,7 +123,7 @@
static bool __kprobes
is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
{
- while (scan_start > scan_end) {
+ while (scan_start >= scan_end) {
/*
* atomic region starts from exclusive load and ends with
* exclusive store.
@@ -142,33 +143,30 @@
{
enum kprobe_insn decoded;
kprobe_opcode_t insn = le32_to_cpu(*addr);
- kprobe_opcode_t *scan_start = addr - 1;
- kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
-#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
- struct module *mod;
-#endif
+ kprobe_opcode_t *scan_end = NULL;
+ unsigned long size = 0, offset = 0;
- if (addr >= (kprobe_opcode_t *)_text &&
- scan_end < (kprobe_opcode_t *)_text)
- scan_end = (kprobe_opcode_t *)_text;
-#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
- else {
- preempt_disable();
- mod = __module_address((unsigned long)addr);
- if (mod && within_module_init((unsigned long)addr, mod) &&
- !within_module_init((unsigned long)scan_end, mod))
- scan_end = (kprobe_opcode_t *)mod->init_layout.base;
- else if (mod && within_module_core((unsigned long)addr, mod) &&
- !within_module_core((unsigned long)scan_end, mod))
- scan_end = (kprobe_opcode_t *)mod->core_layout.base;
- preempt_enable();
+ /*
+ * If there's a symbol defined in front of and near enough to
+ * the probe address assume it is the entry point to this
+ * code and use it to further limit how far back we search
+ * when determining if we're in an atomic sequence. If we could
+ * not find any symbol skip the atomic test altogether as we
+ * could otherwise end up searching irrelevant text/literals.
+ * KPROBES depends on KALLSYMS so this last case should never
+ * happen.
+ */
+ if (kallsyms_lookup_size_offset((unsigned long) addr, &size, &offset)) {
+ if (offset < (MAX_ATOMIC_CONTEXT_SIZE*sizeof(kprobe_opcode_t)))
+ scan_end = addr - (offset / sizeof(kprobe_opcode_t));
+ else
+ scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
}
-#endif
decoded = arm_probe_decode_insn(insn, asi);
- if (decoded == INSN_REJECTED ||
- is_probed_address_atomic(scan_start, scan_end))
- return INSN_REJECTED;
+ if (decoded != INSN_REJECTED && scan_end)
+ if (is_probed_address_atomic(addr - 1, scan_end))
+ return INSN_REJECTED;
return decoded;
}
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index c6b0f40..f5077ea 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -19,7 +19,7 @@
#include <linux/kasan.h>
#include <linux/kernel.h>
#include <linux/kprobes.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/slab.h>
#include <linux/stop_machine.h>
#include <linux/stringify.h>
@@ -31,7 +31,7 @@
#include <asm/insn.h>
#include <asm/uaccess.h>
#include <asm/irq.h>
-#include <asm-generic/sections.h>
+#include <asm/sections.h>
#include "decode-insn.h"
@@ -166,13 +166,18 @@
}
/*
- * The D-flag (Debug mask) is set (masked) upon debug exception entry.
- * Kprobes needs to clear (unmask) D-flag -ONLY- in case of recursive
- * probe i.e. when probe hit from kprobe handler context upon
- * executing the pre/post handlers. In this case we return with
- * D-flag clear so that single-stepping can be carried-out.
- *
- * Leave D-flag set in all other cases.
+ * When PSTATE.D is set (masked), then software step exceptions can not be
+ * generated.
+ * SPSR's D bit shows the value of PSTATE.D immediately before the
+ * exception was taken. PSTATE.D is set while entering into any exception
+ * mode, however software clears it for any normal (none-debug-exception)
+ * mode in the exception entry. Therefore, when we are entering into kprobe
+ * breakpoint handler from any normal mode then SPSR.D bit is already
+ * cleared, however it is set when we are entering from any debug exception
+ * mode.
+ * Since we always need to generate single step exception after a kprobe
+ * breakpoint exception therefore we need to clear it unconditionally, when
+ * we become sure that the current breakpoint exception is for kprobe.
*/
static void __kprobes
spsr_set_debug_flag(struct pt_regs *regs, int mask)
@@ -245,10 +250,7 @@
set_ss_context(kcb, slot); /* mark pending ss */
- if (kcb->kprobe_status == KPROBE_REENTER)
- spsr_set_debug_flag(regs, 0);
- else
- WARN_ON(regs->pstate & PSR_D_BIT);
+ spsr_set_debug_flag(regs, 0);
/* IRQs and single stepping do not mix well. */
kprobes_save_local_irqflag(kcb, regs);
@@ -333,8 +335,6 @@
BUG();
kernel_disable_single_step();
- if (kcb->kprobe_status == KPROBE_REENTER)
- spsr_set_debug_flag(regs, 1);
if (kcb->kprobe_status == KPROBE_REENTER)
restore_previous_kprobe(kcb);
@@ -457,9 +457,6 @@
kprobes_restore_local_irqflag(kcb, regs);
kernel_disable_single_step();
- if (kcb->kprobe_status == KPROBE_REENTER)
- spsr_set_debug_flag(regs, 1);
-
post_kprobe_handler(kcb, regs);
}
@@ -543,9 +540,6 @@
bool arch_within_kprobe_blacklist(unsigned long addr)
{
- extern char __idmap_text_start[], __idmap_text_end[];
- extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
-
if ((addr >= (unsigned long)__kprobes_text_start &&
addr < (unsigned long)__kprobes_text_end) ||
(addr >= (unsigned long)__entry_text_start &&
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6cd2612..a4f5f76 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -202,7 +202,7 @@
static void tls_thread_flush(void)
{
- asm ("msr tpidr_el0, xzr");
+ write_sysreg(0, tpidr_el0);
if (is_compat_task()) {
current->thread.tp_value = 0;
@@ -213,7 +213,7 @@
* with a stale shadow state during context switch.
*/
barrier();
- asm ("msr tpidrro_el0, xzr");
+ write_sysreg(0, tpidrro_el0);
}
}
@@ -253,7 +253,7 @@
* Read the current TLS pointer from tpidr_el0 as it may be
* out-of-sync with the saved value.
*/
- asm("mrs %0, tpidr_el0" : "=r" (*task_user_tls(p)));
+ *task_user_tls(p) = read_sysreg(tpidr_el0);
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
@@ -289,17 +289,15 @@
{
unsigned long tpidr, tpidrro;
- asm("mrs %0, tpidr_el0" : "=r" (tpidr));
+ tpidr = read_sysreg(tpidr_el0);
*task_user_tls(current) = tpidr;
tpidr = *task_user_tls(next);
tpidrro = is_compat_thread(task_thread_info(next)) ?
next->thread.tp_value : 0;
- asm(
- " msr tpidr_el0, %0\n"
- " msr tpidrro_el0, %1"
- : : "r" (tpidr), "r" (tpidrro));
+ write_sysreg(tpidr, tpidr_el0);
+ write_sysreg(tpidrro, tpidrro_el0);
}
/* Restore the UAO state depending on next's addr_limit */
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index 51b73cd..ce704a4 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -34,7 +34,7 @@
/* Setup the list loop variables. */
mov x17, x1 /* x17 = kimage_start */
mov x16, x0 /* x16 = kimage_head */
- dcache_line_size x15, x0 /* x15 = dcache line size */
+ raw_dcache_line_size x15, x0 /* x15 = dcache line size */
mov x14, xzr /* x14 = entry ptr */
mov x13, xzr /* x13 = copy dest */
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 536dce2..f534f49 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -206,10 +206,15 @@
for_each_memblock(memory, region) {
res = alloc_bootmem_low(sizeof(*res));
- res->name = "System RAM";
+ if (memblock_is_nomap(region)) {
+ res->name = "reserved";
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ } else {
+ res->name = "System RAM";
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ }
res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
- res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
@@ -228,7 +233,7 @@
{
pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
- sprintf(init_utsname()->machine, ELF_PLATFORM);
+ sprintf(init_utsname()->machine, UTS_MACHINE);
init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index a8eafdb..404dd67 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -402,15 +402,31 @@
asmlinkage void do_notify_resume(struct pt_regs *regs,
unsigned int thread_flags)
{
- if (thread_flags & _TIF_SIGPENDING)
- do_signal(regs);
+ /*
+ * The assembly code enters us with IRQs off, but it hasn't
+ * informed the tracing code of that for efficiency reasons.
+ * Update the trace code with the current status.
+ */
+ trace_hardirqs_off();
+ do {
+ if (thread_flags & _TIF_NEED_RESCHED) {
+ schedule();
+ } else {
+ local_irq_enable();
- if (thread_flags & _TIF_NOTIFY_RESUME) {
- clear_thread_flag(TIF_NOTIFY_RESUME);
- tracehook_notify_resume(regs);
- }
+ if (thread_flags & _TIF_SIGPENDING)
+ do_signal(regs);
- if (thread_flags & _TIF_FOREIGN_FPSTATE)
- fpsimd_restore_current_state();
+ if (thread_flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ }
+ if (thread_flags & _TIF_FOREIGN_FPSTATE)
+ fpsimd_restore_current_state();
+ }
+
+ local_irq_disable();
+ thread_flags = READ_ONCE(current_thread_info()->flags);
+ } while (thread_flags & _TIF_WORK_MASK);
}
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index ccf79d8..b8799e7 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -73,10 +73,9 @@
str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
/* find the mpidr_hash */
- ldr x1, =sleep_save_stash
- ldr x1, [x1]
+ ldr_l x1, sleep_save_stash
mrs x7, mpidr_el1
- ldr x9, =mpidr_hash
+ adr_l x9, mpidr_hash
ldr x10, [x9, #MPIDR_HASH_MASK]
/*
* Following code relies on the struct mpidr_hash
@@ -95,36 +94,30 @@
mov x0, #1
ret
ENDPROC(__cpu_suspend_enter)
- .ltorg
+ .pushsection ".idmap.text", "ax"
ENTRY(cpu_resume)
bl el2_setup // if in EL2 drop to EL1 cleanly
+ bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
- adr_l lr, __enable_mmu /* __cpu_setup will return here */
- adr_l x27, _resume_switched /* __enable_mmu will branch here */
- adrp x25, idmap_pg_dir
- adrp x26, swapper_pg_dir
- b __cpu_setup
-ENDPROC(cpu_resume)
-
- .pushsection ".idmap.text", "ax"
-_resume_switched:
+ bl __enable_mmu
ldr x8, =_cpu_resume
br x8
-ENDPROC(_resume_switched)
+ENDPROC(cpu_resume)
.ltorg
.popsection
ENTRY(_cpu_resume)
mrs x1, mpidr_el1
- adrp x8, mpidr_hash
- add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
- /* retrieve mpidr_hash members to compute the hash */
+ adr_l x8, mpidr_hash // x8 = struct mpidr_hash virt address
+
+ /* retrieve mpidr_hash members to compute the hash */
ldr x2, [x8, #MPIDR_HASH_MASK]
ldp w3, w4, [x8, #MPIDR_HASH_SHIFTS]
ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)]
compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2
- /* x7 contains hash index, let's use it to grab context pointer */
+
+ /* x7 contains hash index, let's use it to grab context pointer */
ldr_l x0, sleep_save_stash
ldr x0, [x0, x7, lsl #3]
add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d93d433..d3f151c 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -201,12 +201,6 @@
return ret;
}
-static void smp_store_cpu_info(unsigned int cpuid)
-{
- store_cpu_topology(cpuid);
- numa_store_cpu_info(cpuid);
-}
-
/*
* This is the secondary CPU boot entry. We're using this CPUs
* idle thread stack, but a set of temporary page tables.
@@ -239,7 +233,7 @@
* this CPU ticks all of those. If it doesn't, the CPU will
* fail to come online.
*/
- verify_local_cpu_capabilities();
+ check_local_cpu_capabilities();
if (cpu_ops[cpu]->cpu_postboot)
cpu_ops[cpu]->cpu_postboot();
@@ -254,7 +248,7 @@
*/
notify_cpu_starting(cpu);
- smp_store_cpu_info(cpu);
+ store_cpu_topology(cpu);
/*
* OK, now it's safe to let the boot CPU continue. Wait for
@@ -437,8 +431,19 @@
void __init smp_prepare_boot_cpu(void)
{
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+ /*
+ * Initialise the static keys early as they may be enabled by the
+ * cpufeature code.
+ */
+ jump_label_init();
cpuinfo_store_boot_cpu();
save_boot_cpu_run_el();
+ /*
+ * Run the errata work around checks on the boot CPU, once we have
+ * initialised the cpu feature infrastructure from
+ * cpuinfo_store_boot_cpu() above.
+ */
+ update_cpu_errata_workarounds();
}
static u64 __init of_get_cpu_mpidr(struct device_node *dn)
@@ -619,6 +624,7 @@
}
bootcpu_valid = true;
+ early_map_cpu_to_node(0, of_node_to_nid(dn));
/*
* cpu_logical_map has already been
@@ -689,10 +695,13 @@
{
int err;
unsigned int cpu;
+ unsigned int this_cpu;
init_cpu_topology();
- smp_store_cpu_info(smp_processor_id());
+ this_cpu = smp_processor_id();
+ store_cpu_topology(this_cpu);
+ numa_store_cpu_info(this_cpu);
/*
* If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set
@@ -719,6 +728,7 @@
continue;
set_cpu_present(cpu, true);
+ numa_store_cpu_info(cpu);
}
}
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 18a71bc..9a00eee 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -29,7 +29,8 @@
#include <asm/smp_plat.h>
extern void secondary_holding_pen(void);
-volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
+volatile unsigned long __section(".mmuoff.data.read")
+secondary_holding_pen_release = INVALID_HWID;
static phys_addr_t cpu_release_addr[NR_CPUS];
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index d9751a4..c2efddf 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -43,6 +43,9 @@
unsigned long fp = frame->fp;
unsigned long irq_stack_ptr;
+ if (!tsk)
+ tsk = current;
+
/*
* Switching between stacks is valid when tracing current and in
* non-preemptible context.
@@ -67,7 +70,7 @@
frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if (tsk && tsk->ret_stack &&
+ if (tsk->ret_stack &&
(frame->pc == (unsigned long)return_to_handler)) {
/*
* This is a case where function graph tracer has
@@ -152,6 +155,27 @@
return trace->nr_entries >= trace->max_entries;
}
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+ struct stack_trace_data data;
+ struct stackframe frame;
+
+ data.trace = trace;
+ data.skip = trace->skip;
+ data.no_sched_functions = 0;
+
+ frame.fp = regs->regs[29];
+ frame.sp = regs->sp;
+ frame.pc = regs->pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ frame.graph = current->curr_ret_stack;
+#endif
+
+ walk_stackframe(current, &frame, save_trace, &data);
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
struct stack_trace_data data;
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index b616e36..ad73414 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -23,8 +23,8 @@
* time the notifier runs debug exceptions might have been enabled already,
* with HW breakpoints registers content still in an unknown state.
*/
-static void (*hw_breakpoint_restore)(void *);
-void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
+static int (*hw_breakpoint_restore)(unsigned int);
+void __init cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int))
{
/* Prevent multiple restore hook initializations */
if (WARN_ON(hw_breakpoint_restore))
@@ -34,6 +34,8 @@
void notrace __cpu_suspend_exit(void)
{
+ unsigned int cpu = smp_processor_id();
+
/*
* We are resuming from reset with the idmap active in TTBR0_EL1.
* We must uninstall the idmap and restore the expected MMU
@@ -45,7 +47,7 @@
* Restore per-cpu offset before any kernel
* subsystem relying on it has a chance to run.
*/
- set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+ set_my_cpu_offset(per_cpu_offset(cpu));
/*
* Restore HW breakpoint registers to sane values
@@ -53,7 +55,7 @@
* through local_dbg_restore.
*/
if (hw_breakpoint_restore)
- hw_breakpoint_restore(NULL);
+ hw_breakpoint_restore(cpu);
}
/*
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 28c511b..abaf582 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -94,7 +94,7 @@
* See comment in tls_thread_flush.
*/
barrier();
- asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0]));
+ write_sysreg(regs->regs[0], tpidrro_el0);
return 0;
default:
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index e04f838..5ff020f 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -142,6 +142,11 @@
unsigned long irq_stack_ptr;
int skip;
+ pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
+
+ if (!tsk)
+ tsk = current;
+
/*
* Switching between stacks is valid when tracing current and in
* non-preemptible context.
@@ -151,11 +156,6 @@
else
irq_stack_ptr = 0;
- pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
-
- if (!tsk)
- tsk = current;
-
if (tsk == current) {
frame.fp = (unsigned long)__builtin_frame_address(0);
frame.sp = current_stack_pointer;
@@ -447,36 +447,29 @@
: "=r" (res) \
: "r" (address), "i" (-EFAULT) )
-asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
+static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
{
unsigned long address;
- int ret;
+ int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+ int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
+ int ret = 0;
- /* if this is a write with: Op0=1, Op2=1, Op1=3, CRn=7 */
- if ((esr & 0x01fffc01) == 0x0012dc00) {
- int rt = (esr >> 5) & 0x1f;
- int crm = (esr >> 1) & 0x0f;
+ address = (rt == 31) ? 0 : regs->regs[rt];
- address = (rt == 31) ? 0 : regs->regs[rt];
-
- switch (crm) {
- case 11: /* DC CVAU, gets promoted */
- __user_cache_maint("dc civac", address, ret);
- break;
- case 10: /* DC CVAC, gets promoted */
- __user_cache_maint("dc civac", address, ret);
- break;
- case 14: /* DC CIVAC */
- __user_cache_maint("dc civac", address, ret);
- break;
- case 5: /* IC IVAU */
- __user_cache_maint("ic ivau", address, ret);
- break;
- default:
- force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
- return;
- }
- } else {
+ switch (crm) {
+ case ESR_ELx_SYS64_ISS_CRM_DC_CVAU: /* DC CVAU, gets promoted */
+ __user_cache_maint("dc civac", address, ret);
+ break;
+ case ESR_ELx_SYS64_ISS_CRM_DC_CVAC: /* DC CVAC, gets promoted */
+ __user_cache_maint("dc civac", address, ret);
+ break;
+ case ESR_ELx_SYS64_ISS_CRM_DC_CIVAC: /* DC CIVAC */
+ __user_cache_maint("dc civac", address, ret);
+ break;
+ case ESR_ELx_SYS64_ISS_CRM_IC_IVAU: /* IC IVAU */
+ __user_cache_maint("ic ivau", address, ret);
+ break;
+ default:
force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
return;
}
@@ -487,6 +480,48 @@
regs->pc += 4;
}
+static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
+{
+ int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+
+ regs->regs[rt] = arm64_ftr_reg_ctrel0.sys_val;
+ regs->pc += 4;
+}
+
+struct sys64_hook {
+ unsigned int esr_mask;
+ unsigned int esr_val;
+ void (*handler)(unsigned int esr, struct pt_regs *regs);
+};
+
+static struct sys64_hook sys64_hooks[] = {
+ {
+ .esr_mask = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK,
+ .esr_val = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL,
+ .handler = user_cache_maint_handler,
+ },
+ {
+ /* Trap read access to CTR_EL0 */
+ .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
+ .esr_val = ESR_ELx_SYS64_ISS_SYS_CTR_READ,
+ .handler = ctr_read_handler,
+ },
+ {},
+};
+
+asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
+{
+ struct sys64_hook *hook;
+
+ for (hook = sys64_hooks; hook->handler; hook++)
+ if ((hook->esr_mask & esr) == hook->esr_val) {
+ hook->handler(esr, regs);
+ return;
+ }
+
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+}
+
long compat_arm_syscall(struct pt_regs *regs);
asmlinkage long do_ni_syscall(struct pt_regs *regs)
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 076312b..a2c2478 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -18,12 +18,13 @@
* Author: Will Deacon <will.deacon@arm.com>
*/
-#include <linux/kernel.h>
+#include <linux/cache.h>
#include <linux/clocksource.h>
#include <linux/elf.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/gfp.h>
+#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/signal.h>
@@ -37,8 +38,7 @@
#include <asm/vdso_datapage.h>
extern char vdso_start, vdso_end;
-static unsigned long vdso_pages;
-static struct page **vdso_pagelist;
+static unsigned long vdso_pages __ro_after_init;
/*
* The vDSO data page.
@@ -53,9 +53,9 @@
/*
* Create and map the vectors page for AArch32 tasks.
*/
-static struct page *vectors_page[1];
+static struct page *vectors_page[1] __ro_after_init;
-static int alloc_vectors_page(void)
+static int __init alloc_vectors_page(void)
{
extern char __kuser_helper_start[], __kuser_helper_end[];
extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
@@ -88,7 +88,7 @@
{
struct mm_struct *mm = current->mm;
unsigned long addr = AARCH32_VECTORS_BASE;
- static struct vm_special_mapping spec = {
+ static const struct vm_special_mapping spec = {
.name = "[vectors]",
.pages = vectors_page,
@@ -110,11 +110,19 @@
}
#endif /* CONFIG_COMPAT */
-static struct vm_special_mapping vdso_spec[2];
+static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
+ {
+ .name = "[vvar]",
+ },
+ {
+ .name = "[vdso]",
+ },
+};
static int __init vdso_init(void)
{
int i;
+ struct page **vdso_pagelist;
if (memcmp(&vdso_start, "\177ELF", 4)) {
pr_err("vDSO is not a valid ELF object!\n");
@@ -138,16 +146,8 @@
for (i = 0; i < vdso_pages; i++)
vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i);
- /* Populate the special mapping structures */
- vdso_spec[0] = (struct vm_special_mapping) {
- .name = "[vvar]",
- .pages = vdso_pagelist,
- };
-
- vdso_spec[1] = (struct vm_special_mapping) {
- .name = "[vdso]",
- .pages = &vdso_pagelist[1],
- };
+ vdso_spec[0].pages = &vdso_pagelist[0];
+ vdso_spec[1].pages = &vdso_pagelist[1];
return 0;
}
@@ -201,7 +201,7 @@
*/
void update_vsyscall(struct timekeeper *tk)
{
- u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
+ u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
++vdso_data->tb_seq_count;
smp_wmb();
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 659963d..5ce9b29 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -185,6 +185,25 @@
_data = .;
_sdata = .;
RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+
+ /*
+ * Data written with the MMU off but read with the MMU on requires
+ * cache lines to be invalidated, discarding up to a Cache Writeback
+ * Granule (CWG) of data from the cache. Keep the section that
+ * requires this type of maintenance to be in its own Cache Writeback
+ * Granule (CWG) area so the cache maintenance operations don't
+ * interfere with adjacent data.
+ */
+ .mmuoff.data.write : ALIGN(SZ_2K) {
+ __mmuoff_data_start = .;
+ *(.mmuoff.data.write)
+ }
+ . = ALIGN(SZ_2K);
+ .mmuoff.data.read : {
+ *(.mmuoff.data.read)
+ __mmuoff_data_end = .;
+ }
+
PECOFF_EDATA_PADDING
_edata = .;
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 7ce9315..2726635 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -46,10 +46,6 @@
hvc #0
ldr lr, [sp], #16
ret
-alternative_else
+alternative_else_nop_endif
b __vhe_hyp_call
- nop
- nop
- nop
-alternative_endif
ENDPROC(__kvm_call_hyp)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index e51367d..f302fdb 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -36,6 +36,7 @@
#include <asm/kvm_host.h>
#include <asm/kvm_mmu.h>
#include <asm/perf_event.h>
+#include <asm/sysreg.h>
#include <trace/events/kvm.h>
@@ -67,11 +68,9 @@
/* Make sure noone else changes CSSELR during this! */
local_irq_disable();
- /* Put value into CSSELR */
- asm volatile("msr csselr_el1, %x0" : : "r" (csselr));
+ write_sysreg(csselr, csselr_el1);
isb();
- /* Read result out of CCSIDR */
- asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr));
+ ccsidr = read_sysreg(ccsidr_el1);
local_irq_enable();
return ccsidr;
@@ -174,9 +173,7 @@
if (p->is_write) {
return ignore_write(vcpu, p);
} else {
- u32 val;
- asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
- p->regval = val;
+ p->regval = read_sysreg(dbgauthstatus_el1);
return true;
}
}
@@ -429,10 +426,7 @@
static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
- u64 amair;
-
- asm volatile("mrs %0, amair_el1\n" : "=r" (amair));
- vcpu_sys_reg(vcpu, AMAIR_EL1) = amair;
+ vcpu_sys_reg(vcpu, AMAIR_EL1) = read_sysreg(amair_el1);
}
static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
@@ -456,8 +450,9 @@
{
u64 pmcr, val;
- asm volatile("mrs %0, pmcr_el0\n" : "=r" (pmcr));
- /* Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) is reset to UNKNOWN
+ pmcr = read_sysreg(pmcr_el0);
+ /*
+ * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN
* except PMCR.E resetting to zero.
*/
val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
@@ -557,9 +552,9 @@
return false;
if (!(p->Op2 & 1))
- asm volatile("mrs %0, pmceid0_el0\n" : "=r" (pmceid));
+ pmceid = read_sysreg(pmceid0_el0);
else
- asm volatile("mrs %0, pmceid1_el0\n" : "=r" (pmceid));
+ pmceid = read_sysreg(pmceid1_el0);
p->regval = pmceid;
@@ -1833,11 +1828,7 @@
static void get_##reg(struct kvm_vcpu *v, \
const struct sys_reg_desc *r) \
{ \
- u64 val; \
- \
- asm volatile("mrs %0, " __stringify(reg) "\n" \
- : "=r" (val)); \
- ((struct sys_reg_desc *)r)->val = val; \
+ ((struct sys_reg_desc *)r)->val = read_sysreg(reg); \
}
FUNCTION_INVARIANT(midr_el1)
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index ed90578..46af718 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -26,6 +26,7 @@
#include <asm/kvm_host.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
+#include <asm/sysreg.h>
#include <linux/init.h>
#include "sys_regs.h"
@@ -43,10 +44,7 @@
static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
- u64 actlr;
-
- asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr));
- vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr;
+ vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1);
}
/*
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 4c1e700..c3cd65e 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -29,14 +29,11 @@
* x1 - src
*/
ENTRY(copy_page)
-alternative_if_not ARM64_HAS_NO_HW_PREFETCH
- nop
- nop
-alternative_else
+alternative_if ARM64_HAS_NO_HW_PREFETCH
# Prefetch two cache lines ahead.
prfm pldl1strm, [x1, #128]
prfm pldl1strm, [x1, #256]
-alternative_endif
+alternative_else_nop_endif
ldp x2, x3, [x1]
ldp x4, x5, [x1, #16]
@@ -52,11 +49,9 @@
1:
subs x18, x18, #128
-alternative_if_not ARM64_HAS_NO_HW_PREFETCH
- nop
-alternative_else
+alternative_if ARM64_HAS_NO_HW_PREFETCH
prfm pldl1strm, [x1, #384]
-alternative_endif
+alternative_else_nop_endif
stnp x2, x3, [x0]
ldp x2, x3, [x1]
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 07d7352..58b5a90 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -105,19 +105,20 @@
ENDPROC(__clean_dcache_area_pou)
/*
+ * __dma_inv_area(start, size)
+ * - start - virtual start address of region
+ * - size - size in question
+ */
+__dma_inv_area:
+ add x1, x1, x0
+ /* FALLTHROUGH */
+
+/*
* __inval_cache_range(start, end)
* - start - start address of region
* - end - end address of region
*/
ENTRY(__inval_cache_range)
- /* FALLTHROUGH */
-
-/*
- * __dma_inv_range(start, end)
- * - start - virtual start address of region
- * - end - virtual end address of region
- */
-__dma_inv_range:
dcache_line_size x2, x3
sub x3, x2, #1
tst x1, x3 // end cache line aligned?
@@ -136,46 +137,43 @@
dsb sy
ret
ENDPIPROC(__inval_cache_range)
-ENDPROC(__dma_inv_range)
+ENDPROC(__dma_inv_area)
/*
- * __dma_clean_range(start, end)
- * - start - virtual start address of region
- * - end - virtual end address of region
+ * __clean_dcache_area_poc(kaddr, size)
+ *
+ * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * are cleaned to the PoC.
+ *
+ * - kaddr - kernel address
+ * - size - size in question
*/
-__dma_clean_range:
- dcache_line_size x2, x3
- sub x3, x2, #1
- bic x0, x0, x3
-1:
-alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
- dc cvac, x0
-alternative_else
- dc civac, x0
-alternative_endif
- add x0, x0, x2
- cmp x0, x1
- b.lo 1b
- dsb sy
- ret
-ENDPROC(__dma_clean_range)
+ENTRY(__clean_dcache_area_poc)
+ /* FALLTHROUGH */
/*
- * __dma_flush_range(start, end)
+ * __dma_clean_area(start, size)
* - start - virtual start address of region
- * - end - virtual end address of region
+ * - size - size in question
*/
-ENTRY(__dma_flush_range)
- dcache_line_size x2, x3
- sub x3, x2, #1
- bic x0, x0, x3
-1: dc civac, x0 // clean & invalidate D / U line
- add x0, x0, x2
- cmp x0, x1
- b.lo 1b
- dsb sy
+__dma_clean_area:
+ dcache_by_line_op cvac, sy, x0, x1, x2, x3
ret
-ENDPIPROC(__dma_flush_range)
+ENDPIPROC(__clean_dcache_area_poc)
+ENDPROC(__dma_clean_area)
+
+/*
+ * __dma_flush_area(start, size)
+ *
+ * clean & invalidate D / U line
+ *
+ * - start - virtual start address of region
+ * - size - size in question
+ */
+ENTRY(__dma_flush_area)
+ dcache_by_line_op civac, sy, x0, x1, x2, x3
+ ret
+ENDPIPROC(__dma_flush_area)
/*
* __dma_map_area(start, size, dir)
@@ -184,10 +182,9 @@
* - dir - DMA direction
*/
ENTRY(__dma_map_area)
- add x1, x1, x0
cmp w2, #DMA_FROM_DEVICE
- b.eq __dma_inv_range
- b __dma_clean_range
+ b.eq __dma_inv_area
+ b __dma_clean_area
ENDPIPROC(__dma_map_area)
/*
@@ -197,8 +194,7 @@
* - dir - DMA direction
*/
ENTRY(__dma_unmap_area)
- add x1, x1, x0
cmp w2, #DMA_TO_DEVICE
- b.ne __dma_inv_range
+ b.ne __dma_inv_area
ret
ENDPIPROC(__dma_unmap_area)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index c4284c4..bdacead 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -20,6 +20,7 @@
#include <linux/gfp.h>
#include <linux/acpi.h>
#include <linux/bootmem.h>
+#include <linux/cache.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/genalloc.h>
@@ -30,7 +31,7 @@
#include <asm/cacheflush.h>
-static int swiotlb __read_mostly;
+static int swiotlb __ro_after_init;
static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
bool coherent)
@@ -168,7 +169,7 @@
return ptr;
/* remove any dirty cache lines on the kernel alias */
- __dma_flush_range(ptr, ptr + size);
+ __dma_flush_area(ptr, size);
/* create a coherent mapping */
page = virt_to_page(ptr);
@@ -387,7 +388,7 @@
void *page_addr = page_address(page);
memset(page_addr, 0, atomic_pool_size);
- __dma_flush_range(page_addr, page_addr + atomic_pool_size);
+ __dma_flush_area(page_addr, atomic_pool_size);
atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
if (!atomic_pool)
@@ -548,7 +549,7 @@
/* Thankfully, all cache ops are by VA so we can ignore phys here */
static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
{
- __dma_flush_range(virt, virt + PAGE_SIZE);
+ __dma_flush_area(virt, PAGE_SIZE);
}
static void *__iommu_alloc_attrs(struct device *dev, size_t size,
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index 81acd47..c9f118c 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -2,7 +2,7 @@
* Based on arch/arm/mm/extable.c
*/
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/uaccess.h>
int fixup_exception(struct pt_regs *regs)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 05d2bd7..53d9159 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -18,7 +18,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/signal.h>
#include <linux/mm.h>
#include <linux/hardirq.h>
@@ -251,8 +251,7 @@
good_area:
/*
* Check that the permissions on the VMA allow for the fault which
- * occurred. If we encountered a write or exec fault, we must have
- * appropriate permissions, otherwise we allow any permission.
+ * occurred.
*/
if (!(vma->vm_flags & vm_flags)) {
fault = VM_FAULT_BADACCESS;
@@ -288,7 +287,7 @@
struct task_struct *tsk;
struct mm_struct *mm;
int fault, sig, code;
- unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
+ unsigned long vm_flags = VM_READ | VM_WRITE;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (notify_page_fault(regs, esr))
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 43a76b0..8377329 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -25,8 +25,6 @@
#include <asm/cachetype.h>
#include <asm/tlbflush.h>
-#include "mm.h"
-
void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index bbb7ee7..21c489b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -23,6 +23,7 @@
#include <linux/swap.h>
#include <linux/init.h>
#include <linux/bootmem.h>
+#include <linux/cache.h>
#include <linux/mman.h>
#include <linux/nodemask.h>
#include <linux/initrd.h>
@@ -34,6 +35,7 @@
#include <linux/dma-contiguous.h>
#include <linux/efi.h>
#include <linux/swiotlb.h>
+#include <linux/vmalloc.h>
#include <asm/boot.h>
#include <asm/fixmap.h>
@@ -47,16 +49,14 @@
#include <asm/tlb.h>
#include <asm/alternative.h>
-#include "mm.h"
-
/*
* We need to be able to catch inadvertent references to memstart_addr
* that occur (potentially in generic code) before arm64_memblock_init()
* executes, which assigns it its actual value. So use a default value
* that cannot be mistaken for a real physical address.
*/
-s64 memstart_addr __read_mostly = -1;
-phys_addr_t arm64_dma_phys_limit __read_mostly;
+s64 memstart_addr __ro_after_init = -1;
+phys_addr_t arm64_dma_phys_limit __ro_after_init;
#ifdef CONFIG_BLK_DEV_INITRD
static int __init early_initrd(char *p)
@@ -485,7 +485,12 @@
{
free_reserved_area(__va(__pa(__init_begin)), __va(__pa(__init_end)),
0, "unused kernel");
- fixup_init();
+ /*
+ * Unmap the __init region but leave the VM area in place. This
+ * prevents the region from being reused for kernel modules, which
+ * is not supported by kallsyms.
+ */
+ unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
}
#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
deleted file mode 100644
index 71fe9898..0000000
--- a/arch/arm64/mm/mm.h
+++ /dev/null
@@ -1,2 +0,0 @@
-
-void fixup_init(void);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 4989948..05615a3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -17,6 +17,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/cache.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/errno.h>
@@ -42,11 +43,9 @@
#include <asm/memblock.h>
#include <asm/mmu_context.h>
-#include "mm.h"
-
u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
-u64 kimage_voffset __read_mostly;
+u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset);
/*
@@ -399,16 +398,6 @@
section_size, PAGE_KERNEL_RO);
}
-void fixup_init(void)
-{
- /*
- * Unmap the __init region but leave the VM area in place. This
- * prevents the region from being reused for kernel modules, which
- * is not supported by kallsyms.
- */
- unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
-}
-
static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
pgprot_t prot, struct vm_struct *vma)
{
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 5bb15ea..778a985 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -17,6 +17,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#define pr_fmt(fmt) "NUMA: " fmt
+
#include <linux/acpi.h>
#include <linux/bootmem.h>
#include <linux/memblock.h>
@@ -24,6 +26,7 @@
#include <linux/of.h>
#include <asm/acpi.h>
+#include <asm/sections.h>
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
@@ -38,10 +41,9 @@
{
if (!opt)
return -EINVAL;
- if (!strncmp(opt, "off", 3)) {
- pr_info("%s\n", "NUMA turned off");
+ if (!strncmp(opt, "off", 3))
numa_off = true;
- }
+
return 0;
}
early_param("numa", numa_parse_early_param);
@@ -93,7 +95,6 @@
*/
static void __init setup_node_to_cpumask_map(void)
{
- unsigned int cpu;
int node;
/* setup nr_node_ids if not done yet */
@@ -106,11 +107,8 @@
cpumask_clear(node_to_cpumask_map[node]);
}
- for_each_possible_cpu(cpu)
- set_cpu_numa_node(cpu, NUMA_NO_NODE);
-
/* cpumask_of_node() will now work */
- pr_debug("NUMA: Node to cpumask map for %d nodes\n", nr_node_ids);
+ pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
}
/*
@@ -118,18 +116,77 @@
*/
void numa_store_cpu_info(unsigned int cpu)
{
- map_cpu_to_node(cpu, numa_off ? 0 : cpu_to_node_map[cpu]);
+ map_cpu_to_node(cpu, cpu_to_node_map[cpu]);
}
void __init early_map_cpu_to_node(unsigned int cpu, int nid)
{
/* fallback to node 0 */
- if (nid < 0 || nid >= MAX_NUMNODES)
+ if (nid < 0 || nid >= MAX_NUMNODES || numa_off)
nid = 0;
cpu_to_node_map[cpu] = nid;
+
+ /*
+ * We should set the numa node of cpu0 as soon as possible, because it
+ * has already been set up online before. cpu_to_node(0) will soon be
+ * called.
+ */
+ if (!cpu)
+ set_cpu_numa_node(cpu, nid);
}
+#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+static int __init early_cpu_to_node(int cpu)
+{
+ return cpu_to_node_map[cpu];
+}
+
+static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+ return node_distance(from, to);
+}
+
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
+ size_t align)
+{
+ int nid = early_cpu_to_node(cpu);
+
+ return memblock_virt_alloc_try_nid(size, align,
+ __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+}
+
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+ memblock_free_early(__pa(ptr), size);
+}
+
+void __init setup_per_cpu_areas(void)
+{
+ unsigned long delta;
+ unsigned int cpu;
+ int rc;
+
+ /*
+ * Always reserve area for module percpu variables. That's
+ * what the legacy allocator did.
+ */
+ rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+ PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
+ pcpu_cpu_distance,
+ pcpu_fc_alloc, pcpu_fc_free);
+ if (rc < 0)
+ panic("Failed to initialize percpu areas.");
+
+ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+ for_each_possible_cpu(cpu)
+ __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+}
+#endif
+
/**
* numa_add_memblk - Set node id to memblk
* @nid: NUMA node ID of the new memblk
@@ -145,13 +202,13 @@
ret = memblock_set_node(start, (end - start), &memblock.memory, nid);
if (ret < 0) {
- pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n",
+ pr_err("memblock [0x%llx - 0x%llx] failed to add on node %d\n",
start, (end - 1), nid);
return ret;
}
node_set(nid, numa_nodes_parsed);
- pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n",
+ pr_info("Adding memblock [0x%llx - 0x%llx] on node %d\n",
start, (end - 1), nid);
return ret;
}
@@ -166,19 +223,18 @@
void *nd;
int tnid;
- pr_info("NUMA: Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
- nid, start_pfn << PAGE_SHIFT,
- (end_pfn << PAGE_SHIFT) - 1);
+ pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
+ nid, start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1);
nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
nd = __va(nd_pa);
/* report and initialize */
- pr_info("NUMA: NODE_DATA [mem %#010Lx-%#010Lx]\n",
+ pr_info("NODE_DATA [mem %#010Lx-%#010Lx]\n",
nd_pa, nd_pa + nd_size - 1);
tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
if (tnid != nid)
- pr_info("NUMA: NODE_DATA(%d) on node %d\n", nid, tnid);
+ pr_info("NODE_DATA(%d) on node %d\n", nid, tnid);
node_data[nid] = nd;
memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
@@ -235,8 +291,7 @@
numa_distance[i * numa_distance_cnt + j] = i == j ?
LOCAL_DISTANCE : REMOTE_DISTANCE;
- pr_debug("NUMA: Initialized distance table, cnt=%d\n",
- numa_distance_cnt);
+ pr_debug("Initialized distance table, cnt=%d\n", numa_distance_cnt);
return 0;
}
@@ -257,20 +312,20 @@
void __init numa_set_distance(int from, int to, int distance)
{
if (!numa_distance) {
- pr_warn_once("NUMA: Warning: distance table not allocated yet\n");
+ pr_warn_once("Warning: distance table not allocated yet\n");
return;
}
if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
from < 0 || to < 0) {
- pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
+ pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
from, to, distance);
return;
}
if ((u8)distance != distance ||
(from == to && distance != LOCAL_DISTANCE)) {
- pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+ pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
from, to, distance);
return;
}
@@ -297,7 +352,7 @@
/* Check that valid nid is set to memblks */
for_each_memblock(memory, mblk)
if (mblk->nid == NUMA_NO_NODE || mblk->nid >= MAX_NUMNODES) {
- pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
+ pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
mblk->nid, mblk->base,
mblk->base + mblk->size - 1);
return -EINVAL;
@@ -335,8 +390,10 @@
if (ret < 0)
return ret;
- if (nodes_empty(numa_nodes_parsed))
+ if (nodes_empty(numa_nodes_parsed)) {
+ pr_info("No NUMA configuration found\n");
return -EINVAL;
+ }
ret = numa_register_nodes();
if (ret < 0)
@@ -344,10 +401,6 @@
setup_node_to_cpumask_map();
- /* init boot processor */
- cpu_to_node_map[0] = 0;
- map_cpu_to_node(0, 0);
-
return 0;
}
@@ -367,10 +420,8 @@
if (numa_off)
pr_info("NUMA disabled\n"); /* Forced off on command line. */
- else
- pr_info("No NUMA configuration found\n");
- pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n",
- 0LLU, PFN_PHYS(max_pfn) - 1);
+ pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n",
+ 0LLU, PFN_PHYS(max_pfn) - 1);
for_each_memblock(memory, mblk) {
ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index ca6d268..8def55e 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -139,4 +139,43 @@
__pgprot(0),
__pgprot(PTE_VALID));
}
-#endif
+#ifdef CONFIG_HIBERNATION
+/*
+ * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function
+ * is used to determine if a linear map page has been marked as not-valid by
+ * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit.
+ * This is based on kern_addr_valid(), which almost does what we need.
+ *
+ * Because this is only called on the kernel linear map, p?d_sect() implies
+ * p?d_present(). When debug_pagealloc is enabled, sections mappings are
+ * disabled.
+ */
+bool kernel_page_present(struct page *page)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long addr = (unsigned long)page_address(page);
+
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd))
+ return false;
+
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud))
+ return false;
+ if (pud_sect(*pud))
+ return true;
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ return false;
+ if (pmd_sect(*pmd))
+ return true;
+
+ pte = pte_offset_kernel(pmd, addr);
+ return pte_valid(*pte);
+}
+#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index ae11d4e..371c5f0 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -26,8 +26,6 @@
#include <asm/page.h>
#include <asm/tlbflush.h>
-#include "mm.h"
-
static struct kmem_cache *pgd_cache;
pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 9d37e96..352c73b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -83,6 +83,7 @@
*
* x0: Address of context pointer
*/
+ .pushsection ".idmap.text", "ax"
ENTRY(cpu_do_resume)
ldp x2, x3, [x0]
ldp x4, x5, [x0, #16]
@@ -120,6 +121,7 @@
isb
ret
ENDPROC(cpu_do_resume)
+ .popsection
#endif
/*
@@ -134,17 +136,12 @@
bfi x0, x1, #48, #16 // set the ASID
msr ttbr0_el1, x0 // set TTBR0
isb
-alternative_if_not ARM64_WORKAROUND_CAVIUM_27456
- ret
- nop
- nop
- nop
-alternative_else
+alternative_if ARM64_WORKAROUND_CAVIUM_27456
ic iallu
dsb nsh
isb
+alternative_else_nop_endif
ret
-alternative_endif
ENDPROC(cpu_do_switch_mm)
.pushsection ".idmap.text", "ax"
@@ -181,6 +178,7 @@
* Initialise the processor for turning the MMU on. Return in x0 the
* value of the SCTLR_EL1 register.
*/
+ .pushsection ".idmap.text", "ax"
ENTRY(__cpu_setup)
tlbi vmalle1 // Invalidate local TLB
dsb nsh
@@ -266,3 +264,4 @@
crval:
.word 0xfcffffff // clear
.word 0x34d5d91d // set
+ .popsection
diff --git a/arch/blackfin/kernel/ftrace-entry.S b/arch/blackfin/kernel/ftrace-entry.S
index 28d0595..3b8bdcb 100644
--- a/arch/blackfin/kernel/ftrace-entry.S
+++ b/arch/blackfin/kernel/ftrace-entry.S
@@ -169,7 +169,7 @@
r0 = sp; /* unsigned long *parent */
r1 = [sp]; /* unsigned long self_addr */
# endif
-# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
+# ifdef HAVE_FUNCTION_GRAPH_FP_TEST
r2 = fp; /* unsigned long frame_pointer */
# endif
r0 += 16; /* skip the 4 local regs on stack */
@@ -190,7 +190,7 @@
[--sp] = r1;
/* get original return address */
-# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
+# ifdef HAVE_FUNCTION_GRAPH_FP_TEST
r0 = fp; /* Blackfin is sane, so omit this */
# endif
call _ftrace_return_to_handler;
diff --git a/arch/blackfin/kernel/ftrace.c b/arch/blackfin/kernel/ftrace.c
index 095de0fa..8dad758 100644
--- a/arch/blackfin/kernel/ftrace.c
+++ b/arch/blackfin/kernel/ftrace.c
@@ -107,7 +107,7 @@
return;
if (ftrace_push_return_trace(*parent, self_addr, &trace.depth,
- frame_pointer) == -EBUSY)
+ frame_pointer, NULL) == -EBUSY)
return;
trace.func = self_addr;
diff --git a/arch/blackfin/mach-bf561/coreb.c b/arch/blackfin/mach-bf561/coreb.c
index 78ecb50..8a2543c 100644
--- a/arch/blackfin/mach-bf561/coreb.c
+++ b/arch/blackfin/mach-bf561/coreb.c
@@ -59,18 +59,7 @@
.name = "coreb",
.fops = &coreb_fops,
};
-
-static int __init bf561_coreb_init(void)
-{
- return misc_register(&coreb_dev);
-}
-module_init(bf561_coreb_init);
-
-static void __exit bf561_coreb_exit(void)
-{
- misc_deregister(&coreb_dev);
-}
-module_exit(bf561_coreb_exit);
+module_misc_device(coreb_dev);
MODULE_AUTHOR("Bas Vermeulen <bvermeul@blackstar.xs4all.nl>");
MODULE_DESCRIPTION("BF561 Core B Support");
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 29bd597..c702642 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -56,7 +56,7 @@
#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0)
#define task_thread_info(tsk) ((struct thread_info *) 0)
#endif
-#define free_thread_stack(ti) /* nothing */
+#define free_thread_stack(tsk) /* nothing */
#define task_stack_page(tsk) ((void *)(tsk))
#define __HAVE_THREAD_FUNCTIONS
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 92b7bc9..9273e03 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -796,7 +796,7 @@
* ACPI based hotplug CPU support
*/
#ifdef CONFIG_ACPI_HOTPLUG_CPU
-static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
{
#ifdef CONFIG_ACPI_NUMA
/*
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index eb9220c..9509cc7 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -986,7 +986,7 @@
int cpu = smp_processor_id();
previous_current = curr_task(cpu);
- set_curr_task(cpu, current);
+ ia64_set_curr_task(cpu, current);
if ((p = strchr(current->comm, ' ')))
*p = '\0';
@@ -1360,14 +1360,14 @@
cpumask_clear_cpu(i, &mca_cpu); /* wake next cpu */
while (monarch_cpu != -1)
cpu_relax(); /* spin until last cpu leaves */
- set_curr_task(cpu, previous_current);
+ ia64_set_curr_task(cpu, previous_current);
ia64_mc_info.imi_rendez_checkin[cpu]
= IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
return;
}
}
}
- set_curr_task(cpu, previous_current);
+ ia64_set_curr_task(cpu, previous_current);
ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
monarch_cpu = -1; /* This frees the slaves and previous monarchs */
}
@@ -1729,7 +1729,7 @@
NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1);
mprintk("Slave on cpu %d returning to normal service.\n", cpu);
- set_curr_task(cpu, previous_current);
+ ia64_set_curr_task(cpu, previous_current);
ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
atomic_dec(&slaves);
return;
@@ -1756,7 +1756,7 @@
mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu);
atomic_dec(&monarchs);
- set_curr_task(cpu, previous_current);
+ ia64_set_curr_task(cpu, previous_current);
monarch_cpu = -1;
return;
}
@@ -1890,7 +1890,7 @@
PAGE_KERNEL)));
}
-static void ia64_mca_cmc_vector_adjust(void *dummy)
+static int ia64_mca_cpu_online(unsigned int cpu)
{
unsigned long flags;
@@ -1898,25 +1898,9 @@
if (!cmc_polling_enabled)
ia64_mca_cmc_vector_enable(NULL);
local_irq_restore(flags);
+ return 0;
}
-static int mca_cpu_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
-{
- switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- ia64_mca_cmc_vector_adjust(NULL);
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block mca_cpu_notifier = {
- .notifier_call = mca_cpu_callback
-};
-
/*
* ia64_mca_init
*
@@ -2111,15 +2095,13 @@
if (!mca_init)
return 0;
- register_hotcpu_notifier(&mca_cpu_notifier);
-
/* Setup the CMCI/P vector and handler */
setup_timer(&cmc_poll_timer, ia64_mca_cmc_poll, 0UL);
/* Unmask/enable the vector */
cmc_polling_enabled = 0;
- schedule_work(&cmc_enable_work);
-
+ cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/mca:online",
+ ia64_mca_cpu_online, NULL);
IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __func__);
#ifdef CONFIG_ACPI
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index ec9cc1f..ddb8192 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -396,7 +396,7 @@
mach_max_dma_address = 0xffffffff;
mach_reset = amiga_reset;
-#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE)
+#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP)
mach_beep = amiga_mksound;
#endif
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index cbd5991..97a3c38 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -211,7 +211,7 @@
arch_gettimeoffset = atari_gettimeoffset;
mach_reset = atari_reset;
mach_max_dma_address = 0xffffff;
-#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE)
+#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP)
mach_beep = atari_mksound;
#endif
#ifdef CONFIG_HEARTBEAT
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 8f5b6f7..55be7e3 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -566,6 +566,8 @@
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
@@ -584,6 +586,7 @@
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 31bded9..365dda66 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -525,6 +525,8 @@
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
@@ -543,6 +545,7 @@
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 0d7739e..ce3cbfd 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -546,6 +546,8 @@
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
@@ -564,6 +566,7 @@
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 2cbb5c4..8db496a 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -517,6 +517,8 @@
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
@@ -535,6 +537,7 @@
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 96102a4..8314156 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -527,6 +527,8 @@
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
@@ -545,6 +547,7 @@
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 97d88f7..6600270 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -549,6 +549,8 @@
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
@@ -567,6 +569,7 @@
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index be25ef2..90abfe9 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -629,6 +629,8 @@
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
@@ -647,6 +649,7 @@
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index a008344..0d502c2 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -517,6 +517,8 @@
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
@@ -535,6 +537,7 @@
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 6735a25..5930e91 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -517,6 +517,8 @@
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
@@ -535,6 +537,7 @@
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 780c6e9..74e3ad8 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -540,6 +540,8 @@
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
@@ -558,6 +560,7 @@
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 44693cf..4ba8606 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -518,6 +518,8 @@
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
@@ -536,6 +538,7 @@
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index ef0071d..c6f4972 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -519,6 +519,8 @@
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
@@ -537,6 +539,7 @@
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 5b8ec4d..50633c3 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -105,7 +105,7 @@
#ifdef CONFIG_M68K_L2_CACHE
void (*mach_l2_flush) (int);
#endif
-#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE)
+#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP)
void (*mach_beep)(unsigned int, unsigned int);
EXPORT_SYMBOL(mach_beep);
#endif
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 9202f82..58507ed 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -42,7 +42,7 @@
#include <linux/personality.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/tracehook.h>
#include <asm/setup.h>
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 2f33a33..e468953 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -162,7 +162,7 @@
mach_halt = mac_poweroff;
mach_power_off = mac_poweroff;
mach_max_dma_address = 0xffffffff;
-#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE)
+#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP)
mach_beep = mac_mksound;
#endif
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index fcb7f05..ea89a24 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -180,7 +180,7 @@
mach_reset = q40_reset;
mach_get_model = q40_get_model;
-#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE)
+#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP)
mach_beep = q40_mksound;
#endif
#ifdef CONFIG_HEARTBEAT
diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c
index fc7b48a..d57563c 100644
--- a/arch/microblaze/kernel/ftrace.c
+++ b/arch/microblaze/kernel/ftrace.c
@@ -63,7 +63,7 @@
return;
}
- err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0);
+ err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
if (err == -EBUSY) {
*parent = old;
return;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 2638856..212ff92 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -65,6 +65,7 @@
select ARCH_CLOCKSOURCE_DATA
select HANDLE_DOMAIN_IRQ
select HAVE_EXIT_THREAD
+ select HAVE_REGS_AND_STACK_ACCESS_API
menu "Machine selection"
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index f0e314c..7f975b2 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -113,42 +113,6 @@
help
Add several files to the debugfs to test spinlock speed.
-if CPU_MIPSR6
-
-choice
- prompt "Compact branch policy"
- default MIPS_COMPACT_BRANCHES_OPTIMAL
-
-config MIPS_COMPACT_BRANCHES_NEVER
- bool "Never (force delay slot branches)"
- help
- Pass the -mcompact-branches=never flag to the compiler in order to
- force it to always emit branches with delay slots, and make no use
- of the compact branch instructions introduced by MIPSr6. This is
- useful if you suspect there may be an issue with compact branches in
- either the compiler or the CPU.
-
-config MIPS_COMPACT_BRANCHES_OPTIMAL
- bool "Optimal (use where beneficial)"
- help
- Pass the -mcompact-branches=optimal flag to the compiler in order for
- it to make use of compact branch instructions where it deems them
- beneficial, and use branches with delay slots elsewhere. This is the
- default compiler behaviour, and should be used unless you have a
- reason to choose otherwise.
-
-config MIPS_COMPACT_BRANCHES_ALWAYS
- bool "Always (force compact branches)"
- help
- Pass the -mcompact-branches=always flag to the compiler in order to
- force it to always emit compact branches, making no use of branch
- instructions with delay slots. This can result in more compact code
- which may be beneficial in some scenarios.
-
-endchoice
-
-endif # CPU_MIPSR6
-
config SCACHE_DEBUGFS
bool "L2 cache debugfs entries"
depends on DEBUG_FS
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index efd7a9d..598ab29 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -203,10 +203,6 @@
toolchain-virt := $(call cc-option-yn,$(mips-cflags) -mvirt)
cflags-$(toolchain-virt) += -DTOOLCHAIN_SUPPORTS_VIRT
-cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_NEVER) += -mcompact-branches=never
-cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_OPTIMAL) += -mcompact-branches=optimal
-cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_ALWAYS) += -mcompact-branches=always
-
#
# Firmware support
#
diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c
index 2e73784..cc3a1e3 100644
--- a/arch/mips/ath79/clock.c
+++ b/arch/mips/ath79/clock.c
@@ -96,7 +96,7 @@
struct clk *clk;
clk = clk_register_fixed_factor(NULL, name, parent_name, 0, mult, div);
- if (!clk)
+ if (IS_ERR(clk))
panic("failed to allocate %s clock structure", name);
return clk;
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 5a9b87b..c1eb1ff 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -1619,6 +1619,12 @@
return -ENOMEM;
}
+ /*
+ * Clear the OF_POPULATED flag that was set by of_irq_init()
+ * so that all GPIO devices will be probed.
+ */
+ of_node_clear_flag(gpio_node, OF_POPULATED);
+
return 0;
}
/*
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index b31fbc9..37a932d 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -1059,7 +1059,7 @@
{
return of_platform_bus_probe(NULL, octeon_ids, NULL);
}
-device_initcall(octeon_publish_devices);
+arch_initcall(octeon_publish_devices);
MODULE_AUTHOR("David Daney <ddaney@caviumnetworks.com>");
MODULE_LICENSE("GPL");
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 4d457d60..256fe6f 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -380,29 +380,11 @@
return 0;
}
-static int octeon_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- octeon_update_boot_vector(cpu);
- break;
- case CPU_ONLINE:
- pr_info("Cpu %d online\n", cpu);
- break;
- case CPU_DEAD:
- break;
- }
-
- return NOTIFY_OK;
-}
-
static int register_cavium_notifier(void)
{
- hotcpu_notifier(octeon_cpu_callback, 0);
- return 0;
+ return cpuhp_setup_state_nocalls(CPUHP_MIPS_SOC_PREPARE,
+ "mips/cavium:prepare",
+ octeon_update_boot_vector, NULL);
}
late_initcall(register_cavium_notifier);
diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S
index d7b9918..1910223 100644
--- a/arch/mips/dec/int-handler.S
+++ b/arch/mips/dec/int-handler.S
@@ -146,7 +146,25 @@
/*
* Find irq with highest priority
*/
- PTR_LA t1,cpu_mask_nr_tbl
+ # open coded PTR_LA t1, cpu_mask_nr_tbl
+#if (_MIPS_SZPTR == 32)
+ # open coded la t1, cpu_mask_nr_tbl
+ lui t1, %hi(cpu_mask_nr_tbl)
+ addiu t1, %lo(cpu_mask_nr_tbl)
+
+#endif
+#if (_MIPS_SZPTR == 64)
+ # open coded dla t1, cpu_mask_nr_tbl
+ .set push
+ .set noat
+ lui t1, %highest(cpu_mask_nr_tbl)
+ lui AT, %hi(cpu_mask_nr_tbl)
+ daddiu t1, t1, %higher(cpu_mask_nr_tbl)
+ daddiu AT, AT, %lo(cpu_mask_nr_tbl)
+ dsll t1, 32
+ daddu t1, t1, AT
+ .set pop
+#endif
1: lw t2,(t1)
nop
and t2,t0
@@ -195,7 +213,25 @@
/*
* Find irq with highest priority
*/
- PTR_LA t1,asic_mask_nr_tbl
+ # open coded PTR_LA t1,asic_mask_nr_tbl
+#if (_MIPS_SZPTR == 32)
+ # open coded la t1, asic_mask_nr_tbl
+ lui t1, %hi(asic_mask_nr_tbl)
+ addiu t1, %lo(asic_mask_nr_tbl)
+
+#endif
+#if (_MIPS_SZPTR == 64)
+ # open coded dla t1, asic_mask_nr_tbl
+ .set push
+ .set noat
+ lui t1, %highest(asic_mask_nr_tbl)
+ lui AT, %hi(asic_mask_nr_tbl)
+ daddiu t1, t1, %higher(asic_mask_nr_tbl)
+ daddiu AT, AT, %lo(asic_mask_nr_tbl)
+ dsll t1, 32
+ daddu t1, t1, AT
+ .set pop
+#endif
2: lw t2,(t1)
nop
and t2,t0
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 56584a6..83054f7 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -157,6 +157,7 @@
ldc1 $f28, THREAD_FPR28(\thread)
ldc1 $f30, THREAD_FPR30(\thread)
ctc1 \tmp, fcr31
+ .set pop
.endm
.macro fpu_restore_16odd thread
diff --git a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
index 0cf5ac1..8ff2cbdf 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
@@ -15,8 +15,8 @@
static inline bool __should_swizzle_bits(volatile void *a)
{
extern const bool octeon_should_swizzle_table[];
+ u64 did = ((u64)(uintptr_t)a >> 40) & 0xff;
- unsigned long did = ((unsigned long)a >> 40) & 0xff;
return octeon_should_swizzle_table[did];
}
@@ -29,7 +29,7 @@
#define __should_swizzle_bits(a) false
-static inline bool __should_swizzle_addr(unsigned long p)
+static inline bool __should_swizzle_addr(u64 p)
{
/* boot bus? */
return ((p >> 40) & 0xff) == 0;
diff --git a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
index 2f82bfa..c9f5769 100644
--- a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
@@ -11,11 +11,13 @@
#define CP0_EBASE $15, 1
.macro kernel_entry_setup
+#ifdef CONFIG_SMP
mfc0 t0, CP0_EBASE
andi t0, t0, 0x3ff # CPUNum
beqz t0, 1f
# CPUs other than zero goto smp_bootstrap
j smp_bootstrap
+#endif /* CONFIG_SMP */
1:
.endm
diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
index 58e7874..4fafeef 100644
--- a/arch/mips/include/asm/mips-cm.h
+++ b/arch/mips/include/asm/mips-cm.h
@@ -458,10 +458,21 @@
static inline unsigned int mips_cm_max_vp_width(void)
{
extern int smp_num_siblings;
+ uint32_t cfg;
if (mips_cm_revision() >= CM_REV_CM3)
return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK;
+ if (mips_cm_present()) {
+ /*
+ * We presume that all cores in the system will have the same
+ * number of VP(E)s, and if that ever changes then this will
+ * need revisiting.
+ */
+ cfg = read_gcr_cl_config() & CM_GCR_Cx_CONFIG_PVPE_MSK;
+ return (cfg >> CM_GCR_Cx_CONFIG_PVPE_SHF) + 1;
+ }
+
if (IS_ENABLED(CONFIG_SMP))
return smp_num_siblings;
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index def9d8d..7dd2dd4 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -660,8 +660,6 @@
#define MIPS_CONF7_IAR (_ULCAST_(1) << 10)
#define MIPS_CONF7_AR (_ULCAST_(1) << 16)
-/* FTLB probability bits for R6 */
-#define MIPS_CONF7_FTLBP_SHIFT (18)
/* WatchLo* register definitions */
#define MIPS_WATCHLO_IRW (_ULCAST_(0x7) << 0)
diff --git a/arch/mips/include/asm/uprobes.h b/arch/mips/include/asm/uprobes.h
index 34c325c..70a4a2f 100644
--- a/arch/mips/include/asm/uprobes.h
+++ b/arch/mips/include/asm/uprobes.h
@@ -36,7 +36,6 @@
unsigned long resume_epc;
u32 insn[2];
u32 ixol[2];
- union mips_instruction orig_inst[MAX_UINSN_BYTES / 4];
};
struct arch_uprobe_task {
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index a88d442..dd31754 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -352,7 +352,12 @@
static int mips_ftlb_disabled;
static int mips_has_ftlb_configured;
-static int set_ftlb_enable(struct cpuinfo_mips *c, int enable);
+enum ftlb_flags {
+ FTLB_EN = 1 << 0,
+ FTLB_SET_PROB = 1 << 1,
+};
+
+static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags);
static int __init ftlb_disable(char *s)
{
@@ -371,8 +376,6 @@
return 1;
}
- back_to_back_c0_hazard();
-
config4 = read_c0_config4();
/* Check that FTLB has been disabled */
@@ -531,7 +534,7 @@
return 3;
}
-static int set_ftlb_enable(struct cpuinfo_mips *c, int enable)
+static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags)
{
unsigned int config;
@@ -542,33 +545,33 @@
case CPU_P6600:
/* proAptiv & related cores use Config6 to enable the FTLB */
config = read_c0_config6();
- /* Clear the old probability value */
- config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT);
- if (enable)
- /* Enable FTLB */
- write_c0_config6(config |
- (calculate_ftlb_probability(c)
- << MIPS_CONF6_FTLBP_SHIFT)
- | MIPS_CONF6_FTLBEN);
+
+ if (flags & FTLB_EN)
+ config |= MIPS_CONF6_FTLBEN;
else
- /* Disable FTLB */
- write_c0_config6(config & ~MIPS_CONF6_FTLBEN);
+ config &= ~MIPS_CONF6_FTLBEN;
+
+ if (flags & FTLB_SET_PROB) {
+ config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT);
+ config |= calculate_ftlb_probability(c)
+ << MIPS_CONF6_FTLBP_SHIFT;
+ }
+
+ write_c0_config6(config);
+ back_to_back_c0_hazard();
break;
case CPU_I6400:
- /* I6400 & related cores use Config7 to configure FTLB */
- config = read_c0_config7();
- /* Clear the old probability value */
- config &= ~(3 << MIPS_CONF7_FTLBP_SHIFT);
- write_c0_config7(config | (calculate_ftlb_probability(c)
- << MIPS_CONF7_FTLBP_SHIFT));
- break;
+ /* There's no way to disable the FTLB */
+ if (!(flags & FTLB_EN))
+ return 1;
+ return 0;
case CPU_LOONGSON3:
/* Flush ITLB, DTLB, VTLB and FTLB */
write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB |
LOONGSON_DIAG_VTLB | LOONGSON_DIAG_FTLB);
/* Loongson-3 cores use Config6 to enable the FTLB */
config = read_c0_config6();
- if (enable)
+ if (flags & FTLB_EN)
/* Enable FTLB */
write_c0_config6(config & ~MIPS_CONF6_FTLBDIS);
else
@@ -788,6 +791,7 @@
PAGE_SIZE, config4);
/* Switch FTLB off */
set_ftlb_enable(c, 0);
+ mips_ftlb_disabled = 1;
break;
}
c->tlbsizeftlbsets = 1 <<
@@ -852,7 +856,7 @@
c->scache.flags = MIPS_CACHE_NOT_PRESENT;
/* Enable FTLB if present and not disabled */
- set_ftlb_enable(c, !mips_ftlb_disabled);
+ set_ftlb_enable(c, mips_ftlb_disabled ? 0 : FTLB_EN);
ok = decode_config0(c); /* Read Config registers. */
BUG_ON(!ok); /* Arch spec violation! */
@@ -902,6 +906,9 @@
}
}
+ /* configure the FTLB write probability */
+ set_ftlb_enable(c, (mips_ftlb_disabled ? 0 : FTLB_EN) | FTLB_SET_PROB);
+
mips_probe_watch_registers(c);
#ifndef CONFIG_MIPS_CPS
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 937c54b..30a3b75 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -382,8 +382,8 @@
if (unlikely(faulted))
goto out;
- if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp)
- == -EBUSY) {
+ if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp,
+ NULL) == -EBUSY) {
*parent_ra_addr = old_parent_ra;
return;
}
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 17326a9..dc0b296 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -142,9 +142,8 @@
PTR_LA k1, __r4k_wait
ori k0, 0x1f /* 32 byte rollback region */
xori k0, 0x1f
- bne k0, k1, 9f
+ bne k0, k1, \handler
MTC0 k0, CP0_EPC
-9:
.set pop
.endm
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index c3372ca..0a7e10b 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -1164,7 +1164,9 @@
regs->regs[31] = r31;
regs->cp0_epc = epc;
if (!used_math()) { /* First time FPU user. */
+ preempt_disable();
err = init_fpu();
+ preempt_enable();
set_used_math();
}
lose_fpu(1); /* Save FPU state for the emulator. */
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 7429ad0..d2d0615 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -605,14 +605,14 @@
return -EOPNOTSUPP;
/* Avoid inadvertently triggering emulation */
- if ((value & PR_FP_MODE_FR) && cpu_has_fpu &&
- !(current_cpu_data.fpu_id & MIPS_FPIR_F64))
+ if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu &&
+ !(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64))
return -EOPNOTSUPP;
- if ((value & PR_FP_MODE_FRE) && cpu_has_fpu && !cpu_has_fre)
+ if ((value & PR_FP_MODE_FRE) && raw_cpu_has_fpu && !cpu_has_fre)
return -EOPNOTSUPP;
/* FR = 0 not supported in MIPS R6 */
- if (!(value & PR_FP_MODE_FR) && cpu_has_fpu && cpu_has_mips_r6)
+ if (!(value & PR_FP_MODE_FR) && raw_cpu_has_fpu && cpu_has_mips_r6)
return -EOPNOTSUPP;
/* Proceed with the mode switch */
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 36cf8d6..0d57909 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -87,6 +87,13 @@
int x = boot_mem_map.nr_map;
int i;
+ /*
+ * If the region reaches the top of the physical address space, adjust
+ * the size slightly so that (start + size) doesn't overflow
+ */
+ if (start + size - 1 == (phys_addr_t)ULLONG_MAX)
+ --size;
+
/* Sanity check */
if (start + size < start) {
pr_warn("Trying to add an invalid memory region, skipped\n");
@@ -757,7 +764,6 @@
device_tree_init();
sparse_init();
plat_swiotlb_setup();
- paging_init();
dma_contiguous_reserve(PFN_PHYS(max_low_pfn));
/* Tell bootmem about cma reserved memblock section */
@@ -870,6 +876,7 @@
prefill_possible_map();
cpu_cache_init();
+ paging_init();
}
unsigned long kernelsp[NR_CPUS];
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index e9d9fc6..6183ad8 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -513,7 +513,7 @@
* in which case the CPC will refuse to power down the core.
*/
do {
- mips_cm_lock_other(core, vpe_id);
+ mips_cm_lock_other(core, 0);
mips_cpc_lock_other(core);
stat = read_cpc_co_stat_conf();
stat &= CPC_Cx_STAT_CONF_SEQSTATE_MSK;
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index f95f094..b0baf48 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -322,6 +322,9 @@
cpumask_set_cpu(cpu, &cpu_coherent_mask);
notify_cpu_starting(cpu);
+ cpumask_set_cpu(cpu, &cpu_callin_map);
+ synchronise_count_slave(cpu);
+
set_cpu_online(cpu, true);
set_cpu_sibling_map(cpu);
@@ -329,10 +332,6 @@
calculate_cpu_foreign_map();
- cpumask_set_cpu(cpu, &cpu_callin_map);
-
- synchronise_count_slave(cpu);
-
/*
* irq will be enabled in ->smp_finish(), enabling it too early
* is dangerous.
diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c
index 8452d93..4c7c155 100644
--- a/arch/mips/kernel/uprobes.c
+++ b/arch/mips/kernel/uprobes.c
@@ -157,7 +157,6 @@
int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs)
{
struct uprobe_task *utask = current->utask;
- union mips_instruction insn;
/*
* Now find the EPC where to resume after the breakpoint has been
@@ -168,10 +167,10 @@
unsigned long epc;
epc = regs->cp0_epc;
- __compute_return_epc_for_insn(regs, insn);
+ __compute_return_epc_for_insn(regs,
+ (union mips_instruction) aup->insn[0]);
aup->resume_epc = regs->cp0_epc;
}
-
utask->autask.saved_trap_nr = current->thread.trap_nr;
current->thread.trap_nr = UPROBE_TRAP_NR;
regs->cp0_epc = current->utask->xol_vaddr;
@@ -222,7 +221,7 @@
return NOTIFY_DONE;
switch (val) {
- case DIE_BREAK:
+ case DIE_UPROBE:
if (uprobe_pre_sstep_notifier(regs))
return NOTIFY_STOP;
break;
@@ -257,7 +256,7 @@
ra = regs->regs[31];
/* Replace the return address with the trampoline address */
- regs->regs[31] = ra;
+ regs->regs[31] = trampoline_vaddr;
return ra;
}
@@ -280,24 +279,6 @@
return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
}
-/**
- * set_orig_insn - Restore the original instruction.
- * @mm: the probed process address space.
- * @auprobe: arch specific probepoint information.
- * @vaddr: the virtual address to insert the opcode.
- *
- * For mm @mm, restore the original opcode (opcode) at @vaddr.
- * Return 0 (success) or a negative errno.
- *
- * This overrides the weak version in kernel/events/uprobes.c.
- */
-int set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
- unsigned long vaddr)
-{
- return uprobe_write_opcode(mm, vaddr,
- *(uprobe_opcode_t *)&auprobe->orig_inst[0].word);
-}
-
void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
void *src, unsigned long len)
{
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 9abe447..f9dbfb1 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -39,16 +39,16 @@
static void __init init_vdso_image(struct mips_vdso_image *image)
{
unsigned long num_pages, i;
+ unsigned long data_pfn;
BUG_ON(!PAGE_ALIGNED(image->data));
BUG_ON(!PAGE_ALIGNED(image->size));
num_pages = image->size / PAGE_SIZE;
- for (i = 0; i < num_pages; i++) {
- image->mapping.pages[i] =
- virt_to_page(image->data + (i * PAGE_SIZE));
- }
+ data_pfn = __phys_to_pfn(__pa_symbol(image->data));
+ for (i = 0; i < num_pages; i++)
+ image->mapping.pages[i] = pfn_to_page(data_pfn + i);
}
static int __init init_vdso(void)
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index 2fec6f7..99aab9f 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -677,7 +677,7 @@
play_dead_at_ckseg1(state_addr);
}
-void loongson3_disable_clock(int cpu)
+static int loongson3_disable_clock(unsigned int cpu)
{
uint64_t core_id = cpu_data[cpu].core;
uint64_t package_id = cpu_data[cpu].package;
@@ -688,9 +688,10 @@
if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG))
LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3));
}
+ return 0;
}
-void loongson3_enable_clock(int cpu)
+static int loongson3_enable_clock(unsigned int cpu)
{
uint64_t core_id = cpu_data[cpu].core;
uint64_t package_id = cpu_data[cpu].package;
@@ -701,34 +702,15 @@
if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG))
LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3);
}
-}
-
-#define CPU_POST_DEAD_FROZEN (CPU_POST_DEAD | CPU_TASKS_FROZEN)
-static int loongson3_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch (action) {
- case CPU_POST_DEAD:
- case CPU_POST_DEAD_FROZEN:
- pr_info("Disable clock for CPU#%d\n", cpu);
- loongson3_disable_clock(cpu);
- break;
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- pr_info("Enable clock for CPU#%d\n", cpu);
- loongson3_enable_clock(cpu);
- break;
- }
-
- return NOTIFY_OK;
+ return 0;
}
static int register_loongson3_notifier(void)
{
- hotcpu_notifier(loongson3_cpu_callback, 0);
- return 0;
+ return cpuhp_setup_state_nocalls(CPUHP_MIPS_SOC_PREPARE,
+ "mips/loongson:prepare",
+ loongson3_enable_clock,
+ loongson3_disable_clock);
}
early_initcall(register_loongson3_notifier);
diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index 72a4642..4a094f7 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -298,5 +298,6 @@
/* Set EPC to return to post-branch instruction */
xcp->cp0_epc = current->thread.bd_emu_cont_pc;
pr_debug("dsemulret to 0x%08lx\n", xcp->cp0_epc);
+ MIPS_FPU_EMU_INC_STATS(ds_emul);
return true;
}
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index cd72805..fa7d8d3 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -800,7 +800,7 @@
* If address-based cache ops don't require an SMP call, then
* use them exclusively for small flushes.
*/
- size = start - end;
+ size = end - start;
cache_size = icache_size;
if (!cpu_has_ic_fills_f_dc) {
size *= 2;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index a5509e7..72f7478 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -261,7 +261,6 @@
{
struct maar_config cfg[BOOT_MEM_MAP_MAX];
unsigned i, num_configured, num_cfg = 0;
- phys_addr_t skip;
for (i = 0; i < boot_mem_map.nr_map; i++) {
switch (boot_mem_map.map[i].type) {
@@ -272,14 +271,14 @@
continue;
}
- skip = 0x10000 - (boot_mem_map.map[i].addr & 0xffff);
-
+ /* Round lower up */
cfg[num_cfg].lower = boot_mem_map.map[i].addr;
- cfg[num_cfg].lower += skip;
+ cfg[num_cfg].lower = (cfg[num_cfg].lower + 0xffff) & ~0xffff;
- cfg[num_cfg].upper = cfg[num_cfg].lower;
- cfg[num_cfg].upper += boot_mem_map.map[i].size - 1;
- cfg[num_cfg].upper -= skip;
+ /* Round upper down */
+ cfg[num_cfg].upper = boot_mem_map.map[i].addr +
+ boot_mem_map.map[i].size;
+ cfg[num_cfg].upper = (cfg[num_cfg].upper & ~0xffff) - 1;
cfg[num_cfg].attrs = MIPS_MAAR_S;
num_cfg++;
@@ -441,6 +440,9 @@
#ifdef CONFIG_HIGHMEM
unsigned long tmp;
+ if (cpu_has_dc_aliases)
+ return;
+
for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
struct page *page = pfn_to_page(tmp);
diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c
index ec5b216..7e7364b 100644
--- a/arch/mips/mti-malta/malta-setup.c
+++ b/arch/mips/mti-malta/malta-setup.c
@@ -39,6 +39,9 @@
#include <linux/console.h>
#endif
+#define ROCIT_CONFIG_GEN0 0x1f403000
+#define ROCIT_CONFIG_GEN0_PCI_IOCU BIT(7)
+
extern void malta_be_init(void);
extern int malta_be_handler(struct pt_regs *regs, int is_fixup);
@@ -107,6 +110,8 @@
static int __init plat_enable_iocoherency(void)
{
int supported = 0;
+ u32 cfg;
+
if (mips_revision_sconid == MIPS_REVISION_SCON_BONITO) {
if (BONITO_PCICACHECTRL & BONITO_PCICACHECTRL_CPUCOH_PRES) {
BONITO_PCICACHECTRL |= BONITO_PCICACHECTRL_CPUCOH_EN;
@@ -129,7 +134,8 @@
} else if (mips_cm_numiocu() != 0) {
/* Nothing special needs to be done to enable coherency */
pr_info("CMP IOCU detected\n");
- if ((*(unsigned int *)0xbf403000 & 0x81) != 0x81) {
+ cfg = __raw_readl((u32 *)CKSEG1ADDR(ROCIT_CONFIG_GEN0));
+ if (!(cfg & ROCIT_CONFIG_GEN0_PCI_IOCU)) {
pr_crit("IOCU OPERATION DISABLED BY SWITCH - DEFAULTING TO SW IO COHERENCY\n");
return 0;
}
diff --git a/arch/nios2/boot/dts/10m50_devboard.dts b/arch/nios2/boot/dts/10m50_devboard.dts
index 3e411c6..f362b22 100755
--- a/arch/nios2/boot/dts/10m50_devboard.dts
+++ b/arch/nios2/boot/dts/10m50_devboard.dts
@@ -83,6 +83,7 @@
fifo-size = <32>;
reg-io-width = <4>;
reg-shift = <2>;
+ tx-threshold = <16>;
};
sysid: sysid@18001528 {
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index a828a0a..5a5506a 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -48,7 +48,7 @@
return;
if (ftrace_push_return_trace(old, self_addr, &trace.depth,
- 0 ) == -EBUSY)
+ 0, NULL) == -EBUSY)
return;
/* activate parisc_return_to_handler() as return point */
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index cc52d97..a95639b 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -593,7 +593,8 @@
if (!ftrace_graph_entry(&trace))
goto out;
- if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
+ if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
+ NULL) == -EBUSY)
goto out;
parent = return_hooker;
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 7d95bc4..c491f2c 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -369,44 +369,34 @@
}
#ifdef CONFIG_SMP
-
-static int mmu_context_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int mmu_ctx_cpu_prepare(unsigned int cpu)
{
- unsigned int cpu = (unsigned int)(long)hcpu;
-
/* We don't touch CPU 0 map, it's allocated at aboot and kept
* around forever
*/
if (cpu == boot_cpuid)
- return NOTIFY_OK;
+ return 0;
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
- stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
- kfree(stale_map[cpu]);
- stale_map[cpu] = NULL;
-
- /* We also clear the cpu_vm_mask bits of CPUs going away */
- clear_tasks_mm_cpumask(cpu);
- break;
-#endif /* CONFIG_HOTPLUG_CPU */
- }
- return NOTIFY_OK;
+ pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
+ stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
+ return 0;
}
-static struct notifier_block mmu_context_cpu_nb = {
- .notifier_call = mmu_context_cpu_notify,
-};
+static int mmu_ctx_cpu_dead(unsigned int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ if (cpu == boot_cpuid)
+ return 0;
+
+ pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
+ kfree(stale_map[cpu]);
+ stale_map[cpu] = NULL;
+
+ /* We also clear the cpu_vm_mask bits of CPUs going away */
+ clear_tasks_mm_cpumask(cpu);
+#endif
+ return 0;
+}
#endif /* CONFIG_SMP */
@@ -469,7 +459,9 @@
#else
stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
- register_cpu_notifier(&mmu_context_cpu_nb);
+ cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
+ "powerpc/mmu/ctx:prepare",
+ mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
#endif
printk(KERN_INFO
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 834868b..366e4f5 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -852,37 +852,33 @@
#ifdef CONFIG_PPC64
#ifdef CONFIG_HOTPLUG_CPU
-static int smp_core99_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static unsigned int smp_core99_host_open;
+
+static int smp_core99_cpu_prepare(unsigned int cpu)
{
int rc;
- switch(action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- /* Open i2c bus if it was used for tb sync */
- if (pmac_tb_clock_chip_host) {
- rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1);
- if (rc) {
- pr_err("Failed to open i2c bus for time sync\n");
- return notifier_from_errno(rc);
- }
+ /* Open i2c bus if it was used for tb sync */
+ if (pmac_tb_clock_chip_host && !smp_core99_host_open) {
+ rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1);
+ if (rc) {
+ pr_err("Failed to open i2c bus for time sync\n");
+ return notifier_from_errno(rc);
}
- break;
- case CPU_ONLINE:
- case CPU_UP_CANCELED:
- /* Close i2c bus if it was used for tb sync */
- if (pmac_tb_clock_chip_host)
- pmac_i2c_close(pmac_tb_clock_chip_host);
- break;
- default:
- break;
+ smp_core99_host_open = 1;
}
- return NOTIFY_OK;
+ return 0;
}
-static struct notifier_block smp_core99_cpu_nb = {
- .notifier_call = smp_core99_cpu_notify,
-};
+static int smp_core99_cpu_online(unsigned int cpu)
+{
+ /* Close i2c bus if it was used for tb sync */
+ if (pmac_tb_clock_chip_host && smp_core99_host_open) {
+ pmac_i2c_close(pmac_tb_clock_chip_host);
+ smp_core99_host_open = 0;
+ }
+ return 0;
+}
#endif /* CONFIG_HOTPLUG_CPU */
static void __init smp_core99_bringup_done(void)
@@ -902,7 +898,11 @@
g5_phy_disable_cpu1();
}
#ifdef CONFIG_HOTPLUG_CPU
- register_cpu_notifier(&smp_core99_cpu_nb);
+ cpuhp_setup_state_nocalls(CPUHP_POWERPC_PMAC_PREPARE,
+ "powerpc/pmac:prepare", smp_core99_cpu_prepare,
+ NULL);
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "powerpc/pmac:online",
+ smp_core99_cpu_online, NULL);
#endif
if (ppc_md.progress)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index bc0c91e..38a5c65 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -124,6 +124,13 @@
r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
}
+static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
+{
+ unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+
+ return (resource_flags & flags) == flags;
+}
+
static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
{
phb->ioda.pe_array[pe_no].phb = phb;
@@ -2871,7 +2878,7 @@
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || res->parent)
continue;
- if (!pnv_pci_is_m64(phb, res)) {
+ if (!pnv_pci_is_m64_flags(res->flags)) {
dev_warn(&pdev->dev, "Don't support SR-IOV with"
" non M64 VF BAR%d: %pR. \n",
i, res);
@@ -3096,7 +3103,7 @@
* alignment for any 64-bit resource, PCIe doesn't care and
* bridges only do 64-bit prefetchable anyway.
*/
- if (phb->ioda.m64_segsize && (type & IORESOURCE_MEM_64))
+ if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type))
return phb->ioda.m64_segsize;
if (type & IORESOURCE_MEM)
return phb->ioda.m32_segsize;
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 0f7bfeb..60a8a4e 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -209,7 +209,8 @@
/* Only trace if the calling function expects to. */
if (!ftrace_graph_entry(&trace))
goto out;
- if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
+ if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
+ NULL) == -EBUSY)
goto out;
parent = (unsigned long) return_to_handler;
out:
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index a58bca6..cbb73fa 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -740,28 +740,21 @@
put_task_struct(tsk);
}
-static int pfault_cpu_notify(struct notifier_block *self, unsigned long action,
- void *hcpu)
+static int pfault_cpu_dead(unsigned int cpu)
{
struct thread_struct *thread, *next;
struct task_struct *tsk;
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DEAD:
- spin_lock_irq(&pfault_lock);
- list_for_each_entry_safe(thread, next, &pfault_list, list) {
- thread->pfault_wait = 0;
- list_del(&thread->list);
- tsk = container_of(thread, struct task_struct, thread);
- wake_up_process(tsk);
- put_task_struct(tsk);
- }
- spin_unlock_irq(&pfault_lock);
- break;
- default:
- break;
+ spin_lock_irq(&pfault_lock);
+ list_for_each_entry_safe(thread, next, &pfault_list, list) {
+ thread->pfault_wait = 0;
+ list_del(&thread->list);
+ tsk = container_of(thread, struct task_struct, thread);
+ wake_up_process(tsk);
+ put_task_struct(tsk);
}
- return NOTIFY_OK;
+ spin_unlock_irq(&pfault_lock);
+ return 0;
}
static int __init pfault_irq_init(void)
@@ -775,7 +768,8 @@
if (rc)
goto out_pfault;
irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
- hotcpu_notifier(pfault_cpu_notify, 0);
+ cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
+ NULL, pfault_cpu_dead);
return 0;
out_pfault:
diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h
index caea2c4..1d159ce 100644
--- a/arch/sh/include/asm/atomic-llsc.h
+++ b/arch/sh/include/asm/atomic-llsc.h
@@ -60,7 +60,7 @@
" movco.l %0, @%3 \n" \
" bf 1b \n" \
" synco \n" \
- : "=&z" (temp), "=&z" (res) \
+ : "=&z" (temp), "=&r" (res) \
: "r" (i), "r" (&v->counter) \
: "t"); \
\
diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
index 839612c..0d3637c 100644
--- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
@@ -122,32 +122,16 @@
__raw_writel(STBCR_RESET, STBCR_REG(cpu));
}
-static int
-shx3_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+static int shx3_cpu_prepare(unsigned int cpu)
{
- unsigned int cpu = (unsigned int)hcpu;
-
- switch (action) {
- case CPU_UP_PREPARE:
- shx3_update_boot_vector(cpu);
- break;
- case CPU_ONLINE:
- pr_info("CPU %u is now online\n", cpu);
- break;
- case CPU_DEAD:
- break;
- }
-
- return NOTIFY_OK;
+ shx3_update_boot_vector(cpu);
+ return 0;
}
-static struct notifier_block shx3_cpu_notifier = {
- .notifier_call = shx3_cpu_callback,
-};
-
static int register_shx3_cpu_notifier(void)
{
- register_hotcpu_notifier(&shx3_cpu_notifier);
+ cpuhp_setup_state_nocalls(CPUHP_SH_SH3X_PREPARE, "sh/shx3:prepare",
+ shx3_cpu_prepare, NULL);
return 0;
}
late_initcall(register_shx3_cpu_notifier);
diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
index 38993e0..95eccd4 100644
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -382,7 +382,7 @@
return;
}
- err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0);
+ err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
if (err == -EBUSY) {
__raw_writel(old, parent);
return;
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 59b0960..f5d60f1 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -56,7 +56,6 @@
def_bool 64BIT
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
- select HAVE_FUNCTION_GRAPH_FP_TEST
select HAVE_KRETPROBES
select HAVE_KPROBES
select HAVE_RCU_TABLE_FREE if SMP
diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h
index 3192a8e..62755a3 100644
--- a/arch/sparc/include/asm/ftrace.h
+++ b/arch/sparc/include/asm/ftrace.h
@@ -9,6 +9,10 @@
void _mcount(void);
#endif
+#endif /* CONFIG_MCOUNT */
+
+#if defined(CONFIG_SPARC64) && !defined(CC_USE_FENTRY)
+#define HAVE_FUNCTION_GRAPH_FP_TEST
#endif
#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index 8c2a8c9..c1263fc 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -25,6 +25,7 @@
#define HPAGE_MASK (~(HPAGE_SIZE - 1UL))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
#endif
#ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h
index 26d9e77..ce2233f 100644
--- a/arch/sparc/include/asm/smp_64.h
+++ b/arch/sparc/include/asm/smp_64.h
@@ -43,6 +43,7 @@
int hard_smp_processor_id(void);
#define raw_smp_processor_id() (current_thread_info()->cpu)
+void smp_fill_in_cpu_possible_map(void);
void smp_fill_in_sib_core_maps(void);
void cpu_play_dead(void);
@@ -72,6 +73,7 @@
#define smp_fill_in_sib_core_maps() do { } while (0)
#define smp_fetch_global_regs() do { } while (0)
#define smp_fetch_global_pmu() do { } while (0)
+#define smp_fill_in_cpu_possible_map() do { } while (0)
#endif /* !(CONFIG_SMP) */
diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
index 0a2d2dd..6bcff69 100644
--- a/arch/sparc/kernel/ftrace.c
+++ b/arch/sparc/kernel/ftrace.c
@@ -131,7 +131,7 @@
return parent + 8UL;
if (ftrace_push_return_trace(parent, self_addr, &trace.depth,
- frame_pointer) == -EBUSY)
+ frame_pointer, NULL) == -EBUSY)
return parent + 8UL;
trace.func = self_addr;
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 599f120..6b7331d 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -31,6 +31,7 @@
#include <linux/initrd.h>
#include <linux/module.h>
#include <linux/start_kernel.h>
+#include <linux/bootmem.h>
#include <asm/io.h>
#include <asm/processor.h>
@@ -50,6 +51,8 @@
#include <asm/elf.h>
#include <asm/mdesc.h>
#include <asm/cacheflush.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
#ifdef CONFIG_IP_PNP
#include <net/ipconfig.h>
@@ -590,6 +593,22 @@
pause_patch();
}
+void __init alloc_irqstack_bootmem(void)
+{
+ unsigned int i, node;
+
+ for_each_possible_cpu(i) {
+ node = cpu_to_node(i);
+
+ softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
+ THREAD_SIZE,
+ THREAD_SIZE, 0);
+ hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
+ THREAD_SIZE,
+ THREAD_SIZE, 0);
+ }
+}
+
void __init setup_arch(char **cmdline_p)
{
/* Initialize PROM console and command line. */
@@ -650,6 +669,13 @@
paging_init();
init_sparc64_elf_hwcap();
+ smp_fill_in_cpu_possible_map();
+ /*
+ * Once the OF device tree and MDESC have been setup and nr_cpus has
+ * been parsed, we know the list of possible cpus. Therefore we can
+ * allocate the IRQ stacks.
+ */
+ alloc_irqstack_bootmem();
}
extern int stop_a_enabled;
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index fb30e7c..e80e6ba 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -352,9 +352,7 @@
preempt_disable();
cpu = smp_processor_id();
- /* Invoke the CPU_STARTING notifier callbacks */
notify_cpu_starting(cpu);
-
arch_cpu_pre_online(arg);
/* Set the CPU in the cpu_online_mask */
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 8a6151a..d3035ba 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1227,6 +1227,20 @@
xcall_deliver_impl = hypervisor_xcall_deliver;
}
+void __init smp_fill_in_cpu_possible_map(void)
+{
+ int possible_cpus = num_possible_cpus();
+ int i;
+
+ if (possible_cpus > nr_cpu_ids)
+ possible_cpus = nr_cpu_ids;
+
+ for (i = 0; i < possible_cpus; i++)
+ set_cpu_possible(i, true);
+ for (; i < NR_CPUS; i++)
+ set_cpu_possible(i, false);
+}
+
void smp_fill_in_sib_core_maps(void)
{
unsigned int i;
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index e16fdd2..3f291d8 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -484,6 +484,7 @@
tsb_grow(mm, MM_TSB_BASE, mm_rss);
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count;
+ mm_rss *= REAL_HPAGE_PER_HPAGE;
if (unlikely(mm_rss >
mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 65457c9..7ac6b62 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1160,7 +1160,7 @@
return numa_latency[from][to];
}
-static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
+static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
{
int i;
@@ -1173,8 +1173,8 @@
return i;
}
-static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp,
- int index)
+static void __init find_numa_latencies_for_group(struct mdesc_handle *md,
+ u64 grp, int index)
{
u64 arc;
@@ -2081,7 +2081,6 @@
{
unsigned long end_pfn, shift, phys_base;
unsigned long real_end, i;
- int node;
setup_page_offset();
@@ -2250,21 +2249,6 @@
/* Setup bootmem... */
last_valid_pfn = end_pfn = bootmem_init(phys_base);
- /* Once the OF device tree and MDESC have been setup, we know
- * the list of possible cpus. Therefore we can allocate the
- * IRQ stacks.
- */
- for_each_possible_cpu(i) {
- node = cpu_to_node(i);
-
- softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
- THREAD_SIZE,
- THREAD_SIZE, 0);
- hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
- THREAD_SIZE,
- THREAD_SIZE, 0);
- }
-
kernel_physical_mapping_init();
{
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index 3659d37..c56a195 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -174,10 +174,25 @@
return;
if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
- if (pmd_val(pmd) & _PAGE_PMD_HUGE)
- mm->context.thp_pte_count++;
- else
- mm->context.thp_pte_count--;
+ /*
+ * Note that this routine only sets pmds for THP pages.
+ * Hugetlb pages are handled elsewhere. We need to check
+ * for huge zero page. Huge zero pages are like hugetlb
+ * pages in that there is no RSS, but there is the need
+ * for TSB entries. So, huge zero page counts go into
+ * hugetlb_pte_count.
+ */
+ if (pmd_val(pmd) & _PAGE_PMD_HUGE) {
+ if (is_huge_zero_page(pmd_page(pmd)))
+ mm->context.hugetlb_pte_count++;
+ else
+ mm->context.thp_pte_count++;
+ } else {
+ if (is_huge_zero_page(pmd_page(orig)))
+ mm->context.hugetlb_pte_count--;
+ else
+ mm->context.thp_pte_count--;
+ }
/* Do not try to allocate the TSB hash table if we
* don't have one already. We have various locks held
@@ -204,6 +219,9 @@
}
}
+/*
+ * This routine is only called when splitting a THP
+ */
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
@@ -213,6 +231,15 @@
set_pmd_at(vma->vm_mm, address, pmdp, entry);
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+
+ /*
+ * set_pmd_at() will not be called in a way to decrement
+ * thp_pte_count when splitting a THP, so do it now.
+ * Sanity check pmd before doing the actual decrement.
+ */
+ if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
+ !is_huge_zero_page(pmd_page(entry)))
+ (vma->vm_mm)->context.thp_pte_count--;
}
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index 6725ed4..f2b7711 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -469,8 +469,10 @@
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
+ unsigned long mm_rss = get_mm_rss(mm);
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
- unsigned long total_huge_pte_count;
+ unsigned long saved_hugetlb_pte_count;
+ unsigned long saved_thp_pte_count;
#endif
unsigned int i;
@@ -483,10 +485,12 @@
* will re-increment the counters as the parent PTEs are
* copied into the child address space.
*/
- total_huge_pte_count = mm->context.hugetlb_pte_count +
- mm->context.thp_pte_count;
+ saved_hugetlb_pte_count = mm->context.hugetlb_pte_count;
+ saved_thp_pte_count = mm->context.thp_pte_count;
mm->context.hugetlb_pte_count = 0;
mm->context.thp_pte_count = 0;
+
+ mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE);
#endif
/* copy_mm() copies over the parent's mm_struct before calling
@@ -499,11 +503,13 @@
/* If this is fork, inherit the parent's TSB size. We would
* grow it to that size on the first page fault anyways.
*/
- tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
+ tsb_grow(mm, MM_TSB_BASE, mm_rss);
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
- if (unlikely(total_huge_pte_count))
- tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count);
+ if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count))
+ tsb_grow(mm, MM_TSB_HUGE,
+ (saved_hugetlb_pte_count + saved_thp_pte_count) *
+ REAL_HPAGE_PER_HPAGE);
#endif
if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c
index 4a57208..b827a41 100644
--- a/arch/tile/kernel/ftrace.c
+++ b/arch/tile/kernel/ftrace.c
@@ -184,7 +184,7 @@
*parent = return_hooker;
err = ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer);
+ frame_pointer, NULL);
if (err == -EBUSY) {
*parent = old;
return;
diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
index 2d0266d..3282787 100644
--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -175,27 +175,4 @@
.name = "watchdog",
.fops = &harddog_fops,
};
-
-static char banner[] __initdata = KERN_INFO "UML Watchdog Timer\n";
-
-static int __init harddog_init(void)
-{
- int ret;
-
- ret = misc_register(&harddog_miscdev);
-
- if (ret)
- return ret;
-
- printk(banner);
-
- return 0;
-}
-
-static void __exit harddog_exit(void)
-{
- misc_deregister(&harddog_miscdev);
-}
-
-module_init(harddog_init);
-module_exit(harddog_exit);
+module_misc_device(harddog_miscdev);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2a1f0ce..9b2d50a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -93,6 +93,7 @@
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_WITHIN_STACK_FRAMES
select HAVE_EBPF_JIT if X86_64
+ select HAVE_ARCH_VMAP_STACK if X86_64
select HAVE_CC_STACKPROTECTOR
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
@@ -109,7 +110,6 @@
select HAVE_EXIT_THREAD
select HAVE_FENTRY if X86_64
select HAVE_FTRACE_MCOUNT_RECORD
- select HAVE_FUNCTION_GRAPH_FP_TEST
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
@@ -157,6 +157,7 @@
select SPARSE_IRQ
select SRCU
select SYSCTL_EXCEPTION_TRACE
+ select THREAD_INFO_IN_TASK
select USER_STACKTRACE_SUPPORT
select VIRT_TO_BUS
select X86_DEV_DMA_OPS if X86_64
@@ -549,6 +550,18 @@
Say Y here if you have a Quark based system such as the Arduino
compatible Intel Galileo.
+config MLX_PLATFORM
+ tristate "Mellanox Technologies platform support"
+ depends on X86_64
+ depends on X86_EXTENDED_PLATFORM
+ ---help---
+ This option enables system support for the Mellanox Technologies
+ platform.
+
+ Say Y here if you are building a kernel for Mellanox system.
+
+ Otherwise, say N.
+
config X86_INTEL_LPSS
bool "Intel Low Power Subsystem Support"
depends on X86 && ACPI
@@ -705,7 +718,6 @@
config PARAVIRT_SPINLOCKS
bool "Paravirtualization layer for spinlocks"
depends on PARAVIRT && SMP
- select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS
---help---
Paravirtualized spinlocks allow a pvops backend to replace the
spinlock implementation with something virtualization-friendly
@@ -718,7 +730,7 @@
config QUEUED_LOCK_STAT
bool "Paravirt queued spinlock statistics"
- depends on PARAVIRT_SPINLOCKS && DEBUG_FS && QUEUED_SPINLOCKS
+ depends on PARAVIRT_SPINLOCKS && DEBUG_FS
---help---
Enable the collection of statistical data on the slowpath
behavior of paravirtualized queued spinlocks and report
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 94dd4a3..cc69e37 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -29,22 +29,11 @@
static void setup_boot_services##bits(struct efi_config *c) \
{ \
efi_system_table_##bits##_t *table; \
- efi_boot_services_##bits##_t *bt; \
\
table = (typeof(table))sys_table; \
\
+ c->boot_services = table->boottime; \
c->text_output = table->con_out; \
- \
- bt = (typeof(bt))(unsigned long)(table->boottime); \
- \
- c->allocate_pool = bt->allocate_pool; \
- c->allocate_pages = bt->allocate_pages; \
- c->get_memory_map = bt->get_memory_map; \
- c->free_pool = bt->free_pool; \
- c->free_pages = bt->free_pages; \
- c->locate_handle = bt->locate_handle; \
- c->handle_protocol = bt->handle_protocol; \
- c->exit_boot_services = bt->exit_boot_services; \
}
BOOT_SERVICES(32);
BOOT_SERVICES(64);
@@ -286,29 +275,6 @@
}
}
-static void find_bits(unsigned long mask, u8 *pos, u8 *size)
-{
- u8 first, len;
-
- first = 0;
- len = 0;
-
- if (mask) {
- while (!(mask & 0x1)) {
- mask = mask >> 1;
- first++;
- }
-
- while (mask & 0x1) {
- mask = mask >> 1;
- len++;
- }
- }
-
- *pos = first;
- *size = len;
-}
-
static efi_status_t
__setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
{
@@ -578,7 +544,7 @@
efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
unsigned long nr_ugas;
u32 *handles = (u32 *)uga_handle;;
- efi_status_t status;
+ efi_status_t status = EFI_INVALID_PARAMETER;
int i;
first_uga = NULL;
@@ -623,7 +589,7 @@
efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
unsigned long nr_ugas;
u64 *handles = (u64 *)uga_handle;;
- efi_status_t status;
+ efi_status_t status = EFI_INVALID_PARAMETER;
int i;
first_uga = NULL;
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 1038524..fd0b6a2 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -82,7 +82,7 @@
/* Relocate efi_config->call() */
leal efi32_config(%esi), %eax
- add %esi, 88(%eax)
+ add %esi, 32(%eax)
pushl %eax
call make_boot_params
@@ -108,7 +108,7 @@
/* Relocate efi_config->call() */
leal efi32_config(%esi), %eax
- add %esi, 88(%eax)
+ add %esi, 32(%eax)
pushl %eax
2:
call efi_main
@@ -264,7 +264,7 @@
#ifdef CONFIG_EFI_STUB
.data
efi32_config:
- .fill 11,8,0
+ .fill 4,8,0
.long efi_call_phys
.long 0
.byte 0
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 0d80a7a..efdfba2 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -265,7 +265,7 @@
/*
* Relocate efi_config->call().
*/
- addq %rbp, efi64_config+88(%rip)
+ addq %rbp, efi64_config+32(%rip)
movq %rax, %rdi
call make_boot_params
@@ -285,7 +285,7 @@
* Relocate efi_config->call().
*/
movq efi_config(%rip), %rax
- addq %rbp, 88(%rax)
+ addq %rbp, 32(%rax)
2:
movq efi_config(%rip), %rdi
call efi_main
@@ -457,14 +457,14 @@
#ifdef CONFIG_EFI_MIXED
.global efi32_config
efi32_config:
- .fill 11,8,0
+ .fill 4,8,0
.quad efi64_thunk
.byte 0
#endif
.global efi64_config
efi64_config:
- .fill 11,8,0
+ .fill 4,8,0
.quad efi_call
.byte 1
#endif /* CONFIG_EFI_STUB */
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 1433f6b..bdd9cc5 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -31,13 +31,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
-static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
-{
- unsigned long top_of_stack =
- (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
- return (struct thread_info *)(top_of_stack - THREAD_SIZE);
-}
-
#ifdef CONFIG_CONTEXT_TRACKING
/* Called on entry from user mode with IRQs off. */
__visible inline void enter_from_user_mode(void)
@@ -71,7 +64,7 @@
{
u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
- struct thread_info *ti = pt_regs_to_thread_info(regs);
+ struct thread_info *ti = current_thread_info();
unsigned long ret = 0;
bool emulated = false;
u32 work;
@@ -173,18 +166,17 @@
/* Disable IRQs and retry */
local_irq_disable();
- cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags);
+ cached_flags = READ_ONCE(current_thread_info()->flags);
if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
break;
-
}
}
/* Called with IRQs disabled. */
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
{
- struct thread_info *ti = pt_regs_to_thread_info(regs);
+ struct thread_info *ti = current_thread_info();
u32 cached_flags;
if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
@@ -209,7 +201,7 @@
* special case only applies after poking regs and before the
* very next return to user mode.
*/
- ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+ current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
#endif
user_enter_irqoff();
@@ -247,7 +239,7 @@
*/
__visible inline void syscall_return_slowpath(struct pt_regs *regs)
{
- struct thread_info *ti = pt_regs_to_thread_info(regs);
+ struct thread_info *ti = current_thread_info();
u32 cached_flags = READ_ONCE(ti->flags);
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
@@ -270,7 +262,7 @@
#ifdef CONFIG_X86_64
__visible void do_syscall_64(struct pt_regs *regs)
{
- struct thread_info *ti = pt_regs_to_thread_info(regs);
+ struct thread_info *ti = current_thread_info();
unsigned long nr = regs->orig_ax;
enter_from_user_mode();
@@ -303,11 +295,11 @@
*/
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
{
- struct thread_info *ti = pt_regs_to_thread_info(regs);
+ struct thread_info *ti = current_thread_info();
unsigned int nr = (unsigned int)regs->orig_ax;
#ifdef CONFIG_IA32_EMULATION
- ti->status |= TS_COMPAT;
+ current->thread.status |= TS_COMPAT;
#endif
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 0b56666..b75a8bc 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -204,34 +204,70 @@
POP_GS_EX
.endm
+/*
+ * %eax: prev task
+ * %edx: next task
+ */
+ENTRY(__switch_to_asm)
+ /*
+ * Save callee-saved registers
+ * This must match the order in struct inactive_task_frame
+ */
+ pushl %ebp
+ pushl %ebx
+ pushl %edi
+ pushl %esi
+
+ /* switch stack */
+ movl %esp, TASK_threadsp(%eax)
+ movl TASK_threadsp(%edx), %esp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+ movl TASK_stack_canary(%edx), %ebx
+ movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
+#endif
+
+ /* restore callee-saved registers */
+ popl %esi
+ popl %edi
+ popl %ebx
+ popl %ebp
+
+ jmp __switch_to
+END(__switch_to_asm)
+
+/*
+ * A newly forked process directly context switches into this address.
+ *
+ * eax: prev task we switched from
+ * ebx: kernel thread func (NULL for user thread)
+ * edi: kernel thread arg
+ */
ENTRY(ret_from_fork)
pushl %eax
call schedule_tail
popl %eax
+ testl %ebx, %ebx
+ jnz 1f /* kernel threads are uncommon */
+
+2:
/* When we fork, we trace the syscall return in the child, too. */
movl %esp, %eax
call syscall_return_slowpath
jmp restore_all
-END(ret_from_fork)
-ENTRY(ret_from_kernel_thread)
- pushl %eax
- call schedule_tail
- popl %eax
- movl PT_EBP(%esp), %eax
- call *PT_EBX(%esp)
- movl $0, PT_EAX(%esp)
-
+ /* kernel thread */
+1: movl %edi, %eax
+ call *%ebx
/*
- * Kernel threads return to userspace as if returning from a syscall.
- * We should check whether anything actually uses this path and, if so,
- * consider switching it over to ret_from_fork.
+ * A kernel thread is allowed to return here after successfully
+ * calling do_execve(). Exit to userspace to complete the execve()
+ * syscall.
*/
- movl %esp, %eax
- call syscall_return_slowpath
- jmp restore_all
-ENDPROC(ret_from_kernel_thread)
+ movl $0, PT_EAX(%esp)
+ jmp 2b
+END(ret_from_fork)
/*
* Return to user mode is not as complex as all this looks,
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d172c61..fee1d95 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -179,7 +179,8 @@
* If we need to do entry work or if we guess we'll need to do
* exit work, go straight to the slow path.
*/
- testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ movq PER_CPU_VAR(current_task), %r11
+ testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
jnz entry_SYSCALL64_slow_path
entry_SYSCALL_64_fastpath:
@@ -217,7 +218,8 @@
*/
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ movq PER_CPU_VAR(current_task), %r11
+ testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
jnz 1f
LOCKDEP_SYS_EXIT
@@ -351,8 +353,7 @@
jmp entry_SYSCALL64_slow_path
1:
- /* Called from C */
- jmp *%rax /* called from C */
+ jmp *%rax /* Called from C */
END(stub_ptregs_64)
.macro ptregs_stub func
@@ -369,41 +370,73 @@
#include <asm/syscalls_64.h>
/*
+ * %rdi: prev task
+ * %rsi: next task
+ */
+ENTRY(__switch_to_asm)
+ /*
+ * Save callee-saved registers
+ * This must match the order in inactive_task_frame
+ */
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+ /* switch stack */
+ movq %rsp, TASK_threadsp(%rdi)
+ movq TASK_threadsp(%rsi), %rsp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+ movq TASK_stack_canary(%rsi), %rbx
+ movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+#endif
+
+ /* restore callee-saved registers */
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+
+ jmp __switch_to
+END(__switch_to_asm)
+
+/*
* A newly forked process directly context switches into this address.
*
- * rdi: prev task we switched from
+ * rax: prev task we switched from
+ * rbx: kernel thread func (NULL for user thread)
+ * r12: kernel thread arg
*/
ENTRY(ret_from_fork)
- LOCK ; btr $TIF_FORK, TI_flags(%r8)
-
+ movq %rax, %rdi
call schedule_tail /* rdi: 'prev' task parameter */
- testb $3, CS(%rsp) /* from kernel_thread? */
- jnz 1f
+ testq %rbx, %rbx /* from kernel_thread? */
+ jnz 1f /* kernel threads are uncommon */
- /*
- * We came from kernel_thread. This code path is quite twisted, and
- * someone should clean it up.
- *
- * copy_thread_tls stashes the function pointer in RBX and the
- * parameter to be passed in RBP. The called function is permitted
- * to call do_execve and thereby jump to user mode.
- */
- movq RBP(%rsp), %rdi
- call *RBX(%rsp)
- movl $0, RAX(%rsp)
-
- /*
- * Fall through as though we're exiting a syscall. This makes a
- * twisted sort of sense if we just called do_execve.
- */
-
-1:
+2:
movq %rsp, %rdi
call syscall_return_slowpath /* returns with IRQs disabled */
TRACE_IRQS_ON /* user mode is traced as IRQS on */
SWAPGS
jmp restore_regs_and_iret
+
+1:
+ /* kernel thread */
+ movq %r12, %rdi
+ call *%rbx
+ /*
+ * A kernel thread is allowed to return here after successfully
+ * calling do_execve(). Exit to userspace to complete the execve()
+ * syscall.
+ */
+ movq $0, RAX(%rsp)
+ jmp 2b
END(ret_from_fork)
/*
@@ -555,27 +588,69 @@
#ifdef CONFIG_X86_ESPFIX64
native_irq_return_ldt:
- pushq %rax
- pushq %rdi
+ /*
+ * We are running with user GSBASE. All GPRs contain their user
+ * values. We have a percpu ESPFIX stack that is eight slots
+ * long (see ESPFIX_STACK_SIZE). espfix_waddr points to the bottom
+ * of the ESPFIX stack.
+ *
+ * We clobber RAX and RDI in this code. We stash RDI on the
+ * normal stack and RAX on the ESPFIX stack.
+ *
+ * The ESPFIX stack layout we set up looks like this:
+ *
+ * --- top of ESPFIX stack ---
+ * SS
+ * RSP
+ * RFLAGS
+ * CS
+ * RIP <-- RSP points here when we're done
+ * RAX <-- espfix_waddr points here
+ * --- bottom of ESPFIX stack ---
+ */
+
+ pushq %rdi /* Stash user RDI */
SWAPGS
movq PER_CPU_VAR(espfix_waddr), %rdi
- movq %rax, (0*8)(%rdi) /* RAX */
- movq (2*8)(%rsp), %rax /* RIP */
+ movq %rax, (0*8)(%rdi) /* user RAX */
+ movq (1*8)(%rsp), %rax /* user RIP */
movq %rax, (1*8)(%rdi)
- movq (3*8)(%rsp), %rax /* CS */
+ movq (2*8)(%rsp), %rax /* user CS */
movq %rax, (2*8)(%rdi)
- movq (4*8)(%rsp), %rax /* RFLAGS */
+ movq (3*8)(%rsp), %rax /* user RFLAGS */
movq %rax, (3*8)(%rdi)
- movq (6*8)(%rsp), %rax /* SS */
+ movq (5*8)(%rsp), %rax /* user SS */
movq %rax, (5*8)(%rdi)
- movq (5*8)(%rsp), %rax /* RSP */
+ movq (4*8)(%rsp), %rax /* user RSP */
movq %rax, (4*8)(%rdi)
- andl $0xffff0000, %eax
- popq %rdi
+ /* Now RAX == RSP. */
+
+ andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
+ popq %rdi /* Restore user RDI */
+
+ /*
+ * espfix_stack[31:16] == 0. The page tables are set up such that
+ * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
+ * espfix_waddr for any X. That is, there are 65536 RO aliases of
+ * the same page. Set up RSP so that RSP[31:16] contains the
+ * respective 16 bits of the /userspace/ RSP and RSP nonetheless
+ * still points to an RO alias of the ESPFIX stack.
+ */
orq PER_CPU_VAR(espfix_stack), %rax
SWAPGS
movq %rax, %rsp
- popq %rax
+
+ /*
+ * At this point, we cannot write to the stack any more, but we can
+ * still read.
+ */
+ popq %rax /* Restore user RAX */
+
+ /*
+ * RSP now points to an ordinary IRET frame, except that the page
+ * is read-only and RSP[31:16] are preloaded with the userspace
+ * values. We can now IRET back to userspace.
+ */
jmp native_irq_return_iret
#endif
END(common_interrupt)
@@ -1002,7 +1077,6 @@
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
-.Lerror_entry_from_usermode_swapgs:
/*
* We entered from user mode or we're pretending to have entered
* from user mode due to an IRET fault.
@@ -1045,7 +1119,8 @@
* gsbase and proceed. We'll fix up the exception and land in
* .Lgs_change's error handler with kernel gsbase.
*/
- jmp .Lerror_entry_from_usermode_swapgs
+ SWAPGS
+ jmp .Lerror_entry_done
.Lbstep_iret:
/* Fix truncated RIP */
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 4f74119..3dab75f 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -22,7 +22,7 @@
ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff));
- if (hdr->e_type != ET_DYN)
+ if (GET_LE(&hdr->e_type) != ET_DYN)
fail("input is not a shared object\n");
/* Walk the segment table. */
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index f840766..23c881c 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -37,54 +37,6 @@
struct linux_binprm;
-/*
- * Put the vdso above the (randomized) stack with another randomized
- * offset. This way there is no hole in the middle of address space.
- * To save memory make sure it is still in the same PTE as the stack
- * top. This doesn't give that many random bits.
- *
- * Note that this algorithm is imperfect: the distribution of the vdso
- * start address within a PMD is biased toward the end.
- *
- * Only used for the 64-bit and x32 vdsos.
- */
-static unsigned long vdso_addr(unsigned long start, unsigned len)
-{
-#ifdef CONFIG_X86_32
- return 0;
-#else
- unsigned long addr, end;
- unsigned offset;
-
- /*
- * Round up the start address. It can start out unaligned as a result
- * of stack start randomization.
- */
- start = PAGE_ALIGN(start);
-
- /* Round the lowest possible end address up to a PMD boundary. */
- end = (start + len + PMD_SIZE - 1) & PMD_MASK;
- if (end >= TASK_SIZE_MAX)
- end = TASK_SIZE_MAX;
- end -= len;
-
- if (end > start) {
- offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
- addr = start + (offset << PAGE_SHIFT);
- } else {
- addr = start;
- }
-
- /*
- * Forcibly align the final address in case we have a hardware
- * issue that requires alignment for performance reasons.
- */
- addr = align_vdso_addr(addr);
-
- return addr;
-#endif
-}
-
static int vdso_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
@@ -176,30 +128,28 @@
return VM_FAULT_SIGBUS;
}
-static int map_vdso(const struct vdso_image *image, bool calculate_addr)
+static const struct vm_special_mapping vdso_mapping = {
+ .name = "[vdso]",
+ .fault = vdso_fault,
+ .mremap = vdso_mremap,
+};
+static const struct vm_special_mapping vvar_mapping = {
+ .name = "[vvar]",
+ .fault = vvar_fault,
+};
+
+/*
+ * Add vdso and vvar mappings to current process.
+ * @image - blob to map
+ * @addr - request a specific address (zero to map at free addr)
+ */
+static int map_vdso(const struct vdso_image *image, unsigned long addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- unsigned long addr, text_start;
+ unsigned long text_start;
int ret = 0;
- static const struct vm_special_mapping vdso_mapping = {
- .name = "[vdso]",
- .fault = vdso_fault,
- .mremap = vdso_mremap,
- };
- static const struct vm_special_mapping vvar_mapping = {
- .name = "[vvar]",
- .fault = vvar_fault,
- };
-
- if (calculate_addr) {
- addr = vdso_addr(current->mm->start_stack,
- image->size - image->sym_vvar_start);
- } else {
- addr = 0;
- }
-
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
@@ -238,24 +188,104 @@
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
- goto up_fail;
+ do_munmap(mm, text_start, image->size);
}
up_fail:
- if (ret)
+ if (ret) {
current->mm->context.vdso = NULL;
+ current->mm->context.vdso_image = NULL;
+ }
up_write(&mm->mmap_sem);
return ret;
}
+#ifdef CONFIG_X86_64
+/*
+ * Put the vdso above the (randomized) stack with another randomized
+ * offset. This way there is no hole in the middle of address space.
+ * To save memory make sure it is still in the same PTE as the stack
+ * top. This doesn't give that many random bits.
+ *
+ * Note that this algorithm is imperfect: the distribution of the vdso
+ * start address within a PMD is biased toward the end.
+ *
+ * Only used for the 64-bit and x32 vdsos.
+ */
+static unsigned long vdso_addr(unsigned long start, unsigned len)
+{
+ unsigned long addr, end;
+ unsigned offset;
+
+ /*
+ * Round up the start address. It can start out unaligned as a result
+ * of stack start randomization.
+ */
+ start = PAGE_ALIGN(start);
+
+ /* Round the lowest possible end address up to a PMD boundary. */
+ end = (start + len + PMD_SIZE - 1) & PMD_MASK;
+ if (end >= TASK_SIZE_MAX)
+ end = TASK_SIZE_MAX;
+ end -= len;
+
+ if (end > start) {
+ offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
+ addr = start + (offset << PAGE_SHIFT);
+ } else {
+ addr = start;
+ }
+
+ /*
+ * Forcibly align the final address in case we have a hardware
+ * issue that requires alignment for performance reasons.
+ */
+ addr = align_vdso_addr(addr);
+
+ return addr;
+}
+
+static int map_vdso_randomized(const struct vdso_image *image)
+{
+ unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start);
+
+ return map_vdso(image, addr);
+}
+#endif
+
+int map_vdso_once(const struct vdso_image *image, unsigned long addr)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+
+ down_write(&mm->mmap_sem);
+ /*
+ * Check if we have already mapped vdso blob - fail to prevent
+ * abusing from userspace install_speciall_mapping, which may
+ * not do accounting and rlimit right.
+ * We could search vma near context.vdso, but it's a slowpath,
+ * so let's explicitely check all VMAs to be completely sure.
+ */
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (vma_is_special_mapping(vma, &vdso_mapping) ||
+ vma_is_special_mapping(vma, &vvar_mapping)) {
+ up_write(&mm->mmap_sem);
+ return -EEXIST;
+ }
+ }
+ up_write(&mm->mmap_sem);
+
+ return map_vdso(image, addr);
+}
+
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
static int load_vdso32(void)
{
if (vdso32_enabled != 1) /* Other values all mean "disabled" */
return 0;
- return map_vdso(&vdso_image_32, false);
+ return map_vdso(&vdso_image_32, 0);
}
#endif
@@ -265,7 +295,7 @@
if (!vdso64_enabled)
return 0;
- return map_vdso(&vdso_image_64, true);
+ return map_vdso_randomized(&vdso_image_64);
}
#ifdef CONFIG_COMPAT
@@ -276,8 +306,7 @@
if (test_thread_flag(TIF_X32)) {
if (!vdso64_enabled)
return 0;
-
- return map_vdso(&vdso_image_x32, true);
+ return map_vdso_randomized(&vdso_image_x32);
}
#endif
#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index d0efb5c..d31735f 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -37,6 +37,7 @@
#include <asm/timer.h>
#include <asm/desc.h>
#include <asm/ldt.h>
+#include <asm/unwind.h>
#include "perf_event.h"
@@ -1201,6 +1202,9 @@
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be performed
* at commit time (->commit_txn) as a whole.
+ *
+ * If commit fails, we'll call ->del() on all events
+ * for which ->add() was called.
*/
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
goto done_collect;
@@ -1223,6 +1227,14 @@
cpuc->n_added += n - n0;
cpuc->n_txn += n - n0;
+ if (x86_pmu.add) {
+ /*
+ * This is before x86_pmu_enable() will call x86_pmu_start(),
+ * so we enable LBRs before an event needs them etc..
+ */
+ x86_pmu.add(event);
+ }
+
ret = 0;
out:
return ret;
@@ -1346,7 +1358,7 @@
event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
/*
- * If we're called during a txn, we don't need to do anything.
+ * If we're called during a txn, we only need to undo x86_pmu.add.
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
*
@@ -1354,7 +1366,7 @@
* an event added during that same TXN.
*/
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
- return;
+ goto do_del;
/*
* Not a TXN, therefore cleanup properly.
@@ -1384,6 +1396,15 @@
--cpuc->n_events;
perf_event_update_userpage(event);
+
+do_del:
+ if (x86_pmu.del) {
+ /*
+ * This is after x86_pmu_stop(); so we disable LBRs after any
+ * event can need them etc..
+ */
+ x86_pmu.del(event);
+ }
}
int x86_pmu_handle_irq(struct pt_regs *regs)
@@ -2247,39 +2268,26 @@
cyc2ns_read_end(data);
}
-/*
- * callchain support
- */
-
-static int backtrace_stack(void *data, char *name)
-{
- return 0;
-}
-
-static int backtrace_address(void *data, unsigned long addr, int reliable)
-{
- struct perf_callchain_entry_ctx *entry = data;
-
- return perf_callchain_store(entry, addr);
-}
-
-static const struct stacktrace_ops backtrace_ops = {
- .stack = backtrace_stack,
- .address = backtrace_address,
- .walk_stack = print_context_stack_bp,
-};
-
void
perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
+ struct unwind_state state;
+ unsigned long addr;
+
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* TODO: We don't support guest os callchain now */
return;
}
- perf_callchain_store(entry, regs->ip);
+ if (perf_callchain_store(entry, regs->ip))
+ return;
- dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
+ for (unwind_start(&state, current, regs, NULL); !unwind_done(&state);
+ unwind_next_frame(&state)) {
+ addr = unwind_get_return_address(&state);
+ if (!addr || perf_callchain_store(entry, addr))
+ return;
+ }
}
static inline int
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index bdcd651..982c9e3 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -455,7 +455,7 @@
* The only surefire way of knowing if this NMI is ours is by checking
* the write ptr against the PMI threshold.
*/
- if (ds->bts_index >= ds->bts_interrupt_threshold)
+ if (ds && (ds->bts_index >= ds->bts_interrupt_threshold))
handled = 1;
/*
@@ -584,7 +584,8 @@
if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
return -ENODEV;
- bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
+ bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
+ PERF_PMU_CAP_EXCLUSIVE;
bts_pmu.task_ctx_nr = perf_sw_context;
bts_pmu.event_init = bts_event_init;
bts_pmu.add = bts_event_add;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 4c9a79b..a3a9eb8 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1906,13 +1906,6 @@
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
cpuc->intel_cp_status &= ~(1ull << hwc->idx);
- /*
- * must disable before any actual event
- * because any event may be combined with LBR
- */
- if (needs_branch_stack(event))
- intel_pmu_lbr_disable(event);
-
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
@@ -1924,6 +1917,14 @@
intel_pmu_pebs_disable(event);
}
+static void intel_pmu_del_event(struct perf_event *event)
+{
+ if (needs_branch_stack(event))
+ intel_pmu_lbr_del(event);
+ if (event->attr.precise_ip)
+ intel_pmu_pebs_del(event);
+}
+
static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
{
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
@@ -1967,12 +1968,6 @@
intel_pmu_enable_bts(hwc->config);
return;
}
- /*
- * must enabled before any actual event
- * because any event may be combined with LBR
- */
- if (needs_branch_stack(event))
- intel_pmu_lbr_enable(event);
if (event->attr.exclude_host)
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1993,6 +1988,14 @@
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
}
+static void intel_pmu_add_event(struct perf_event *event)
+{
+ if (event->attr.precise_ip)
+ intel_pmu_pebs_add(event);
+ if (needs_branch_stack(event))
+ intel_pmu_lbr_add(event);
+}
+
/*
* Save and restart an expired event. Called by NMI contexts,
* so it has to be careful about preempting normal event ops:
@@ -3291,6 +3294,8 @@
.enable_all = intel_pmu_enable_all,
.enable = intel_pmu_enable_event,
.disable = intel_pmu_disable_event,
+ .add = intel_pmu_add_event,
+ .del = intel_pmu_del_event,
.hw_config = intel_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 9b983a4..0319311 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -806,9 +806,65 @@
return &emptyconstraint;
}
-static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+/*
+ * We need the sched_task callback even for per-cpu events when we use
+ * the large interrupt threshold, such that we can provide PID and TID
+ * to PEBS samples.
+ */
+static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
{
- return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+ return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
+}
+
+static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
+{
+ struct debug_store *ds = cpuc->ds;
+ u64 threshold;
+
+ if (cpuc->n_pebs == cpuc->n_large_pebs) {
+ threshold = ds->pebs_absolute_maximum -
+ x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+ } else {
+ threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+ }
+
+ ds->pebs_interrupt_threshold = threshold;
+}
+
+static void
+pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
+{
+ /*
+ * Make sure we get updated with the first PEBS
+ * event. It will trigger also during removal, but
+ * that does not hurt:
+ */
+ bool update = cpuc->n_pebs == 1;
+
+ if (needed_cb != pebs_needs_sched_cb(cpuc)) {
+ if (!needed_cb)
+ perf_sched_cb_inc(pmu);
+ else
+ perf_sched_cb_dec(pmu);
+
+ update = true;
+ }
+
+ if (update)
+ pebs_update_threshold(cpuc);
+}
+
+void intel_pmu_pebs_add(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ bool needed_cb = pebs_needs_sched_cb(cpuc);
+
+ cpuc->n_pebs++;
+ if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+ cpuc->n_large_pebs++;
+
+ pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
}
void intel_pmu_pebs_enable(struct perf_event *event)
@@ -816,12 +872,9 @@
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
struct debug_store *ds = cpuc->ds;
- bool first_pebs;
- u64 threshold;
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
- first_pebs = !pebs_is_enabled(cpuc);
cpuc->pebs_enabled |= 1ULL << hwc->idx;
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
@@ -830,46 +883,34 @@
cpuc->pebs_enabled |= 1ULL << 63;
/*
- * When the event is constrained enough we can use a larger
- * threshold and run the event with less frequent PMI.
+ * Use auto-reload if possible to save a MSR write in the PMI.
+ * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
*/
- if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
- threshold = ds->pebs_absolute_maximum -
- x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
-
- if (first_pebs)
- perf_sched_cb_inc(event->ctx->pmu);
- } else {
- threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
-
- /*
- * If not all events can use larger buffer,
- * roll back to threshold = 1
- */
- if (!first_pebs &&
- (ds->pebs_interrupt_threshold > threshold))
- perf_sched_cb_dec(event->ctx->pmu);
- }
-
- /* Use auto-reload if possible to save a MSR write in the PMI */
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
ds->pebs_event_reset[hwc->idx] =
(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
}
+}
- if (first_pebs || ds->pebs_interrupt_threshold > threshold)
- ds->pebs_interrupt_threshold = threshold;
+void intel_pmu_pebs_del(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ bool needed_cb = pebs_needs_sched_cb(cpuc);
+
+ cpuc->n_pebs--;
+ if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+ cpuc->n_large_pebs--;
+
+ pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
}
void intel_pmu_pebs_disable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- struct debug_store *ds = cpuc->ds;
- bool large_pebs = ds->pebs_interrupt_threshold >
- ds->pebs_buffer_base + x86_pmu.pebs_record_size;
- if (large_pebs)
+ if (cpuc->n_pebs == cpuc->n_large_pebs)
intel_pmu_drain_pebs_buffer();
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
@@ -879,9 +920,6 @@
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled &= ~(1ULL << 63);
- if (large_pebs && !pebs_is_enabled(cpuc))
- perf_sched_cb_dec(event->ctx->pmu);
-
if (cpuc->enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 707d358..fc6cf21 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -380,7 +380,6 @@
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx;
/*
@@ -390,31 +389,21 @@
*/
task_ctx = ctx ? ctx->task_ctx_data : NULL;
if (task_ctx) {
- if (sched_in) {
+ if (sched_in)
__intel_pmu_lbr_restore(task_ctx);
- cpuc->lbr_context = ctx;
- } else {
+ else
__intel_pmu_lbr_save(task_ctx);
- }
return;
}
/*
- * When sampling the branck stack in system-wide, it may be
- * necessary to flush the stack on context switch. This happens
- * when the branch stack does not tag its entries with the pid
- * of the current task. Otherwise it becomes impossible to
- * associate a branch entry with a task. This ambiguity is more
- * likely to appear when the branch stack supports priv level
- * filtering and the user sets it to monitor only at the user
- * level (which could be a useful measurement in system-wide
- * mode). In that case, the risk is high of having a branch
- * stack with branch from multiple tasks.
- */
- if (sched_in) {
+ * Since a context switch can flip the address space and LBR entries
+ * are not tagged with an identifier, we need to wipe the LBR, even for
+ * per-cpu events. You simply cannot resolve the branches from the old
+ * address space.
+ */
+ if (sched_in)
intel_pmu_lbr_reset();
- cpuc->lbr_context = ctx;
- }
}
static inline bool branch_user_callstack(unsigned br_sel)
@@ -422,7 +411,7 @@
return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
}
-void intel_pmu_lbr_enable(struct perf_event *event)
+void intel_pmu_lbr_add(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx;
@@ -430,27 +419,38 @@
if (!x86_pmu.lbr_nr)
return;
- /*
- * Reset the LBR stack if we changed task context to
- * avoid data leaks.
- */
- if (event->ctx->task && cpuc->lbr_context != event->ctx) {
- intel_pmu_lbr_reset();
- cpuc->lbr_context = event->ctx;
- }
cpuc->br_sel = event->hw.branch_reg.reg;
- if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
- event->ctx->task_ctx_data) {
+ if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
task_ctx = event->ctx->task_ctx_data;
task_ctx->lbr_callstack_users++;
}
- cpuc->lbr_users++;
+ /*
+ * Request pmu::sched_task() callback, which will fire inside the
+ * regular perf event scheduling, so that call will:
+ *
+ * - restore or wipe; when LBR-callstack,
+ * - wipe; otherwise,
+ *
+ * when this is from __perf_event_task_sched_in().
+ *
+ * However, if this is from perf_install_in_context(), no such callback
+ * will follow and we'll need to reset the LBR here if this is the
+ * first LBR event.
+ *
+ * The problem is, we cannot tell these cases apart... but we can
+ * exclude the biggest chunk of cases by looking at
+ * event->total_time_running. An event that has accrued runtime cannot
+ * be 'new'. Conversely, a new event can get installed through the
+ * context switch path for the first time.
+ */
perf_sched_cb_inc(event->ctx->pmu);
+ if (!cpuc->lbr_users++ && !event->total_time_running)
+ intel_pmu_lbr_reset();
}
-void intel_pmu_lbr_disable(struct perf_event *event)
+void intel_pmu_lbr_del(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx;
@@ -467,12 +467,6 @@
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
perf_sched_cb_dec(event->ctx->pmu);
-
- if (cpuc->enabled && !cpuc->lbr_users) {
- __intel_pmu_lbr_disable();
- /* avoid stale pointer */
- cpuc->lbr_context = NULL;
- }
}
void intel_pmu_lbr_enable_all(bool pmi)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 861a7d9..c5047b8 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -69,6 +69,8 @@
PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)),
PT_CAP(ip_filtering, 0, CR_EBX, BIT(2)),
PT_CAP(mtc, 0, CR_EBX, BIT(3)),
+ PT_CAP(ptwrite, 0, CR_EBX, BIT(4)),
+ PT_CAP(power_event_trace, 0, CR_EBX, BIT(5)),
PT_CAP(topa_output, 0, CR_ECX, BIT(0)),
PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)),
PT_CAP(single_range_output, 0, CR_ECX, BIT(2)),
@@ -259,10 +261,16 @@
#define RTIT_CTL_MTC (RTIT_CTL_MTC_EN | \
RTIT_CTL_MTC_RANGE)
+#define RTIT_CTL_PTW (RTIT_CTL_PTW_EN | \
+ RTIT_CTL_FUP_ON_PTW)
+
#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | \
RTIT_CTL_DISRETC | \
RTIT_CTL_CYC_PSB | \
- RTIT_CTL_MTC)
+ RTIT_CTL_MTC | \
+ RTIT_CTL_PWR_EVT_EN | \
+ RTIT_CTL_FUP_ON_PTW | \
+ RTIT_CTL_PTW_EN)
static bool pt_event_valid(struct perf_event *event)
{
@@ -311,6 +319,20 @@
return false;
}
+ if (config & RTIT_CTL_PWR_EVT_EN &&
+ !pt_cap_get(PT_CAP_power_event_trace))
+ return false;
+
+ if (config & RTIT_CTL_PTW) {
+ if (!pt_cap_get(PT_CAP_ptwrite))
+ return false;
+
+ /* FUPonPTW without PTW doesn't make sense */
+ if ((config & RTIT_CTL_FUP_ON_PTW) &&
+ !(config & RTIT_CTL_PTW_EN))
+ return false;
+ }
+
return true;
}
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index efffa4a..53473c2 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -26,11 +26,14 @@
#define RTIT_CTL_CYCLEACC BIT(1)
#define RTIT_CTL_OS BIT(2)
#define RTIT_CTL_USR BIT(3)
+#define RTIT_CTL_PWR_EVT_EN BIT(4)
+#define RTIT_CTL_FUP_ON_PTW BIT(5)
#define RTIT_CTL_CR3EN BIT(7)
#define RTIT_CTL_TOPA BIT(8)
#define RTIT_CTL_MTC_EN BIT(9)
#define RTIT_CTL_TSC_EN BIT(10)
#define RTIT_CTL_DISRETC BIT(11)
+#define RTIT_CTL_PTW_EN BIT(12)
#define RTIT_CTL_BRANCH_EN BIT(13)
#define RTIT_CTL_MTC_RANGE_OFFSET 14
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
@@ -91,6 +94,8 @@
PT_CAP_psb_cyc,
PT_CAP_ip_filtering,
PT_CAP_mtc,
+ PT_CAP_ptwrite,
+ PT_CAP_power_event_trace,
PT_CAP_topa_output,
PT_CAP_topa_multiple_entries,
PT_CAP_single_range_output,
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 2886593..b0f0e83 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -357,6 +357,8 @@
if (event->cpu < 0)
return -EINVAL;
+ event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+
/*
* check event is known (determines counter)
*/
@@ -765,6 +767,8 @@
X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init),
+
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 463dc7a..d9844cc 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -664,6 +664,8 @@
event->cpu = box->cpu;
event->pmu_private = box;
+ event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+
event->hw.idx = -1;
event->hw.last_tag = ~0ULL;
event->hw.extra_reg.idx = EXTRA_REG_NONE;
@@ -683,7 +685,8 @@
/* fixed counters have event field hardcoded to zero */
hwc->config = 0ULL;
} else {
- hwc->config = event->attr.config & pmu->type->event_mask;
+ hwc->config = event->attr.config &
+ (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
if (pmu->type->ops->hw_config) {
ret = pmu->type->ops->hw_config(box, event);
if (ret)
@@ -1321,6 +1324,11 @@
.pci_init = skl_uncore_pci_init,
};
+static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
+ .cpu_init = skx_uncore_cpu_init,
+ .pci_init = skx_uncore_pci_init,
+};
+
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init),
@@ -1343,6 +1351,7 @@
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 78b9c23..ad986c1 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -44,6 +44,7 @@
unsigned perf_ctr;
unsigned event_ctl;
unsigned event_mask;
+ unsigned event_mask_ext;
unsigned fixed_ctr;
unsigned fixed_ctl;
unsigned box_ctl;
@@ -120,6 +121,7 @@
};
#define UNCORE_BOX_FLAG_INITIATED 0
+#define UNCORE_BOX_FLAG_CTL_OFFS8 1 /* event config registers are 8-byte apart */
struct uncore_event_desc {
struct kobj_attribute attr;
@@ -172,6 +174,9 @@
static inline
unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
{
+ if (test_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags))
+ return idx * 8 + box->pmu->type->event_ctl;
+
return idx * 4 + box->pmu->type->event_ctl;
}
@@ -377,6 +382,8 @@
void bdx_uncore_cpu_init(void);
int knl_uncore_pci_init(void);
void knl_uncore_cpu_init(void);
+int skx_uncore_pci_init(void);
+void skx_uncore_cpu_init(void);
/* perf_event_intel_uncore_nhmex.c */
void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 9d35ec0..5f845ee 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -388,6 +388,8 @@
event->cpu = box->cpu;
event->pmu_private = box;
+ event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+
event->hw.idx = -1;
event->hw.last_tag = ~0ULL;
event->hw.extra_reg.idx = EXTRA_REG_NONE;
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 8aee83b..2724277 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1,6 +1,10 @@
/* SandyBridge-EP/IvyTown uncore support */
#include "uncore.h"
+/* SNB-EP pci bus to socket mapping */
+#define SNBEP_CPUNODEID 0x40
+#define SNBEP_GIDNIDMAP 0x54
+
/* SNB-EP Box level control */
#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0)
#define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1)
@@ -264,15 +268,72 @@
SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+/* SKX pci bus to socket mapping */
+#define SKX_CPUNODEID 0xc0
+#define SKX_GIDNIDMAP 0xd4
+
+/* SKX CHA */
+#define SKX_CHA_MSR_PMON_BOX_FILTER_TID (0x1ffULL << 0)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_LINK (0xfULL << 9)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_STATE (0x3ffULL << 17)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_REM (0x1ULL << 32)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_LOC (0x1ULL << 33)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_ALL_OPC (0x1ULL << 35)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_NM (0x1ULL << 36)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_NOT_NM (0x1ULL << 37)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_OPC0 (0x3ffULL << 41)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_OPC1 (0x3ffULL << 51)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63)
+
+/* SKX IIO */
+#define SKX_IIO0_MSR_PMON_CTL0 0xa48
+#define SKX_IIO0_MSR_PMON_CTR0 0xa41
+#define SKX_IIO0_MSR_PMON_BOX_CTL 0xa40
+#define SKX_IIO_MSR_OFFSET 0x20
+
+#define SKX_PMON_CTL_TRESH_MASK (0xff << 24)
+#define SKX_PMON_CTL_TRESH_MASK_EXT (0xf)
+#define SKX_PMON_CTL_CH_MASK (0xff << 4)
+#define SKX_PMON_CTL_FC_MASK (0x7 << 12)
+#define SKX_IIO_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PMON_CTL_UMASK_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_INVERT | \
+ SKX_PMON_CTL_TRESH_MASK)
+#define SKX_IIO_PMON_RAW_EVENT_MASK_EXT (SKX_PMON_CTL_TRESH_MASK_EXT | \
+ SKX_PMON_CTL_CH_MASK | \
+ SKX_PMON_CTL_FC_MASK)
+
+/* SKX IRP */
+#define SKX_IRP0_MSR_PMON_CTL0 0xa5b
+#define SKX_IRP0_MSR_PMON_CTR0 0xa59
+#define SKX_IRP0_MSR_PMON_BOX_CTL 0xa58
+#define SKX_IRP_MSR_OFFSET 0x20
+
+/* SKX UPI */
+#define SKX_UPI_PCI_PMON_CTL0 0x350
+#define SKX_UPI_PCI_PMON_CTR0 0x318
+#define SKX_UPI_PCI_PMON_BOX_CTL 0x378
+#define SKX_PMON_CTL_UMASK_EXT 0xff
+
+/* SKX M2M */
+#define SKX_M2M_PCI_PMON_CTL0 0x228
+#define SKX_M2M_PCI_PMON_CTR0 0x200
+#define SKX_M2M_PCI_PMON_BOX_CTL 0x258
+
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-39");
DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(thresh9, thresh, "config:24-35");
DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29");
DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
@@ -280,6 +341,8 @@
DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31");
+DEFINE_UNCORE_FORMAT_ATTR(ch_mask, ch_mask, "config:36-43");
+DEFINE_UNCORE_FORMAT_ATTR(fc_mask, fc_mask, "config:44-46");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
@@ -288,18 +351,26 @@
DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link4, filter_link, "config1:9-12");
DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47");
DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22");
DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23");
DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state5, filter_state, "config1:17-26");
+DEFINE_UNCORE_FORMAT_ATTR(filter_rem, filter_rem, "config1:32");
+DEFINE_UNCORE_FORMAT_ATTR(filter_loc, filter_loc, "config1:33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nm, filter_nm, "config1:36");
+DEFINE_UNCORE_FORMAT_ATTR(filter_not_nm, filter_not_nm, "config1:37");
DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33");
DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35");
DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37");
DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60");
DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc_0, filter_opc0, "config1:41-50");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc_1, filter_opc1, "config1:51-60");
DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62");
DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61");
DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63");
@@ -1153,7 +1224,7 @@
/*
* build pci bus to socket mapping
*/
-static int snbep_pci2phy_map_init(int devid)
+static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool reverse)
{
struct pci_dev *ubox_dev = NULL;
int i, bus, nodeid, segment;
@@ -1168,12 +1239,12 @@
break;
bus = ubox_dev->bus->number;
/* get the Node ID of the local register */
- err = pci_read_config_dword(ubox_dev, 0x40, &config);
+ err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
if (err)
break;
nodeid = config;
/* get the Node ID mapping */
- err = pci_read_config_dword(ubox_dev, 0x54, &config);
+ err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
if (err)
break;
@@ -1207,11 +1278,20 @@
raw_spin_lock(&pci2phy_map_lock);
list_for_each_entry(map, &pci2phy_map_head, list) {
i = -1;
- for (bus = 255; bus >= 0; bus--) {
- if (map->pbus_to_physid[bus] >= 0)
- i = map->pbus_to_physid[bus];
- else
- map->pbus_to_physid[bus] = i;
+ if (reverse) {
+ for (bus = 255; bus >= 0; bus--) {
+ if (map->pbus_to_physid[bus] >= 0)
+ i = map->pbus_to_physid[bus];
+ else
+ map->pbus_to_physid[bus] = i;
+ }
+ } else {
+ for (bus = 0; bus <= 255; bus++) {
+ if (map->pbus_to_physid[bus] >= 0)
+ i = map->pbus_to_physid[bus];
+ else
+ map->pbus_to_physid[bus] = i;
+ }
}
}
raw_spin_unlock(&pci2phy_map_lock);
@@ -1224,7 +1304,7 @@
int snbep_uncore_pci_init(void)
{
- int ret = snbep_pci2phy_map_init(0x3ce0);
+ int ret = snbep_pci2phy_map_init(0x3ce0, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true);
if (ret)
return ret;
uncore_pci_uncores = snbep_pci_uncores;
@@ -1788,7 +1868,7 @@
int ivbep_uncore_pci_init(void)
{
- int ret = snbep_pci2phy_map_init(0x0e1e);
+ int ret = snbep_pci2phy_map_init(0x0e1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true);
if (ret)
return ret;
uncore_pci_uncores = ivbep_pci_uncores;
@@ -2897,7 +2977,7 @@
int hswep_uncore_pci_init(void)
{
- int ret = snbep_pci2phy_map_init(0x2f1e);
+ int ret = snbep_pci2phy_map_init(0x2f1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true);
if (ret)
return ret;
uncore_pci_uncores = hswep_pci_uncores;
@@ -3186,7 +3266,7 @@
int bdx_uncore_pci_init(void)
{
- int ret = snbep_pci2phy_map_init(0x6f1e);
+ int ret = snbep_pci2phy_map_init(0x6f1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true);
if (ret)
return ret;
@@ -3196,3 +3276,525 @@
}
/* end of BDX uncore support */
+
+/* SKX uncore support */
+
+static struct intel_uncore_type skx_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = HSWEP_U_MSR_PMON_CTR0,
+ .event_ctl = HSWEP_U_MSR_PMON_CTL0,
+ .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+ .ops = &ivbep_uncore_msr_ops,
+ .format_group = &ivbep_uncore_ubox_format_group,
+};
+
+static struct attribute *skx_uncore_cha_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid4.attr,
+ &format_attr_filter_link4.attr,
+ &format_attr_filter_state5.attr,
+ &format_attr_filter_rem.attr,
+ &format_attr_filter_loc.attr,
+ &format_attr_filter_nm.attr,
+ &format_attr_filter_all_op.attr,
+ &format_attr_filter_not_nm.attr,
+ &format_attr_filter_opc_0.attr,
+ &format_attr_filter_opc_1.attr,
+ &format_attr_filter_nc.attr,
+ &format_attr_filter_c6.attr,
+ &format_attr_filter_isoc.attr,
+ NULL,
+};
+
+static struct attribute_group skx_uncore_chabox_format_group = {
+ .name = "format",
+ .attrs = skx_uncore_cha_formats_attr,
+};
+
+static struct event_constraint skx_uncore_chabox_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg skx_uncore_cha_extra_regs[] = {
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8134, 0xffff, 0x4),
+};
+
+static u64 skx_cha_filter_mask(int fields)
+{
+ u64 mask = 0;
+
+ if (fields & 0x1)
+ mask |= SKX_CHA_MSR_PMON_BOX_FILTER_TID;
+ if (fields & 0x2)
+ mask |= SKX_CHA_MSR_PMON_BOX_FILTER_LINK;
+ if (fields & 0x4)
+ mask |= SKX_CHA_MSR_PMON_BOX_FILTER_STATE;
+ return mask;
+}
+
+static struct event_constraint *
+skx_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ return __snbep_cbox_get_constraint(box, event, skx_cha_filter_mask);
+}
+
+static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct extra_reg *er;
+ int idx = 0;
+
+ for (er = skx_uncore_cha_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ idx |= er->idx;
+ }
+
+ if (idx) {
+ reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
+ HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 & skx_cha_filter_mask(idx);
+ reg1->idx = idx;
+ }
+ return 0;
+}
+
+static struct intel_uncore_ops skx_uncore_chabox_ops = {
+ /* There is no frz_en for chabox ctl */
+ .init_box = ivbep_uncore_msr_init_box,
+ .disable_box = snbep_uncore_msr_disable_box,
+ .enable_box = snbep_uncore_msr_enable_box,
+ .disable_event = snbep_uncore_msr_disable_event,
+ .enable_event = hswep_cbox_enable_event,
+ .read_counter = uncore_msr_read_counter,
+ .hw_config = skx_cha_hw_config,
+ .get_constraint = skx_cha_get_constraint,
+ .put_constraint = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type skx_uncore_chabox = {
+ .name = "cha",
+ .num_counters = 4,
+ .perf_ctr_bits = 48,
+ .event_ctl = HSWEP_C0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
+ .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL,
+ .msr_offset = HSWEP_CBO_MSR_OFFSET,
+ .num_shared_regs = 1,
+ .constraints = skx_uncore_chabox_constraints,
+ .ops = &skx_uncore_chabox_ops,
+ .format_group = &skx_uncore_chabox_format_group,
+};
+
+static struct attribute *skx_uncore_iio_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh9.attr,
+ &format_attr_ch_mask.attr,
+ &format_attr_fc_mask.attr,
+ NULL,
+};
+
+static struct attribute_group skx_uncore_iio_format_group = {
+ .name = "format",
+ .attrs = skx_uncore_iio_formats_attr,
+};
+
+static struct event_constraint skx_uncore_iio_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x83, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x88, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0x95, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0xc5, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0xd4, 0xc),
+ EVENT_CONSTRAINT_END
+};
+
+static void skx_iio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops skx_uncore_iio_ops = {
+ .init_box = ivbep_uncore_msr_init_box,
+ .disable_box = snbep_uncore_msr_disable_box,
+ .enable_box = snbep_uncore_msr_enable_box,
+ .disable_event = snbep_uncore_msr_disable_event,
+ .enable_event = skx_iio_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type skx_uncore_iio = {
+ .name = "iio",
+ .num_counters = 4,
+ .num_boxes = 5,
+ .perf_ctr_bits = 48,
+ .event_ctl = SKX_IIO0_MSR_PMON_CTL0,
+ .perf_ctr = SKX_IIO0_MSR_PMON_CTR0,
+ .event_mask = SKX_IIO_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SKX_IIO_PMON_RAW_EVENT_MASK_EXT,
+ .box_ctl = SKX_IIO0_MSR_PMON_BOX_CTL,
+ .msr_offset = SKX_IIO_MSR_OFFSET,
+ .constraints = skx_uncore_iio_constraints,
+ .ops = &skx_uncore_iio_ops,
+ .format_group = &skx_uncore_iio_format_group,
+};
+
+static struct attribute *skx_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute_group skx_uncore_format_group = {
+ .name = "format",
+ .attrs = skx_uncore_formats_attr,
+};
+
+static struct intel_uncore_type skx_uncore_irp = {
+ .name = "irp",
+ .num_counters = 2,
+ .num_boxes = 5,
+ .perf_ctr_bits = 48,
+ .event_ctl = SKX_IRP0_MSR_PMON_CTL0,
+ .perf_ctr = SKX_IRP0_MSR_PMON_CTR0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SKX_IRP0_MSR_PMON_BOX_CTL,
+ .msr_offset = SKX_IRP_MSR_OFFSET,
+ .ops = &skx_uncore_iio_ops,
+ .format_group = &skx_uncore_format_group,
+};
+
+static struct intel_uncore_ops skx_uncore_pcu_ops = {
+ IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .hw_config = hswep_pcu_hw_config,
+ .get_constraint = snbep_pcu_get_constraint,
+ .put_constraint = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type skx_uncore_pcu = {
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0,
+ .event_ctl = HSWEP_PCU_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &skx_uncore_pcu_ops,
+ .format_group = &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *skx_msr_uncores[] = {
+ &skx_uncore_ubox,
+ &skx_uncore_chabox,
+ &skx_uncore_iio,
+ &skx_uncore_irp,
+ &skx_uncore_pcu,
+ NULL,
+};
+
+static int skx_count_chabox(void)
+{
+ struct pci_dev *chabox_dev = NULL;
+ int bus, count = 0;
+
+ while (1) {
+ chabox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x208d, chabox_dev);
+ if (!chabox_dev)
+ break;
+ if (count == 0)
+ bus = chabox_dev->bus->number;
+ if (bus != chabox_dev->bus->number)
+ break;
+ count++;
+ }
+
+ pci_dev_put(chabox_dev);
+ return count;
+}
+
+void skx_uncore_cpu_init(void)
+{
+ skx_uncore_chabox.num_boxes = skx_count_chabox();
+ uncore_msr_uncores = skx_msr_uncores;
+}
+
+static struct intel_uncore_type skx_uncore_imc = {
+ .name = "imc",
+ .num_counters = 4,
+ .num_boxes = 6,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+ .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+ .event_descs = hswep_uncore_imc_events,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .ops = &ivbep_uncore_pci_ops,
+ .format_group = &skx_uncore_format_group,
+};
+
+static struct attribute *skx_upi_uncore_formats_attr[] = {
+ &format_attr_event_ext.attr,
+ &format_attr_umask_ext.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute_group skx_upi_uncore_format_group = {
+ .name = "format",
+ .attrs = skx_upi_uncore_formats_attr,
+};
+
+static void skx_upi_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+
+ __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
+ pci_write_config_dword(pdev, SKX_UPI_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT);
+}
+
+static struct intel_uncore_ops skx_upi_uncore_pci_ops = {
+ .init_box = skx_upi_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = snbep_uncore_pci_disable_event,
+ .enable_event = snbep_uncore_pci_enable_event,
+ .read_counter = snbep_uncore_pci_read_counter,
+};
+
+static struct intel_uncore_type skx_uncore_upi = {
+ .name = "upi",
+ .num_counters = 4,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SKX_UPI_PCI_PMON_CTR0,
+ .event_ctl = SKX_UPI_PCI_PMON_CTL0,
+ .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SKX_PMON_CTL_UMASK_EXT,
+ .box_ctl = SKX_UPI_PCI_PMON_BOX_CTL,
+ .ops = &skx_upi_uncore_pci_ops,
+ .format_group = &skx_upi_uncore_format_group,
+};
+
+static void skx_m2m_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+
+ __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
+ pci_write_config_dword(pdev, SKX_M2M_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT);
+}
+
+static struct intel_uncore_ops skx_m2m_uncore_pci_ops = {
+ .init_box = skx_m2m_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = snbep_uncore_pci_disable_event,
+ .enable_event = snbep_uncore_pci_enable_event,
+ .read_counter = snbep_uncore_pci_read_counter,
+};
+
+static struct intel_uncore_type skx_uncore_m2m = {
+ .name = "m2m",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SKX_M2M_PCI_PMON_CTR0,
+ .event_ctl = SKX_M2M_PCI_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SKX_M2M_PCI_PMON_BOX_CTL,
+ .ops = &skx_m2m_uncore_pci_ops,
+ .format_group = &skx_uncore_format_group,
+};
+
+static struct event_constraint skx_uncore_m2pcie_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type skx_uncore_m2pcie = {
+ .name = "m2pcie",
+ .num_counters = 4,
+ .num_boxes = 4,
+ .perf_ctr_bits = 48,
+ .constraints = skx_uncore_m2pcie_constraints,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .ops = &ivbep_uncore_pci_ops,
+ .format_group = &skx_uncore_format_group,
+};
+
+static struct event_constraint skx_uncore_m3upi_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x1d, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x1e, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x40, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x4e, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x4f, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x50, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x51, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x52, 0x7),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type skx_uncore_m3upi = {
+ .name = "m3upi",
+ .num_counters = 3,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .constraints = skx_uncore_m3upi_constraints,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .ops = &ivbep_uncore_pci_ops,
+ .format_group = &skx_uncore_format_group,
+};
+
+enum {
+ SKX_PCI_UNCORE_IMC,
+ SKX_PCI_UNCORE_M2M,
+ SKX_PCI_UNCORE_UPI,
+ SKX_PCI_UNCORE_M2PCIE,
+ SKX_PCI_UNCORE_M3UPI,
+};
+
+static struct intel_uncore_type *skx_pci_uncores[] = {
+ [SKX_PCI_UNCORE_IMC] = &skx_uncore_imc,
+ [SKX_PCI_UNCORE_M2M] = &skx_uncore_m2m,
+ [SKX_PCI_UNCORE_UPI] = &skx_uncore_upi,
+ [SKX_PCI_UNCORE_M2PCIE] = &skx_uncore_m2pcie,
+ [SKX_PCI_UNCORE_M3UPI] = &skx_uncore_m3upi,
+ NULL,
+};
+
+static const struct pci_device_id skx_uncore_pci_ids[] = {
+ { /* MC0 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2042),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 2, SKX_PCI_UNCORE_IMC, 0),
+ },
+ { /* MC0 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2046),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 6, SKX_PCI_UNCORE_IMC, 1),
+ },
+ { /* MC0 Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204a),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 2, SKX_PCI_UNCORE_IMC, 2),
+ },
+ { /* MC1 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2042),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 2, SKX_PCI_UNCORE_IMC, 3),
+ },
+ { /* MC1 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2046),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 6, SKX_PCI_UNCORE_IMC, 4),
+ },
+ { /* MC1 Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204a),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(13, 2, SKX_PCI_UNCORE_IMC, 5),
+ },
+ { /* M2M0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2066),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 0, SKX_PCI_UNCORE_M2M, 0),
+ },
+ { /* M2M1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2066),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 0, SKX_PCI_UNCORE_M2M, 1),
+ },
+ { /* UPI0 Link 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(14, 0, SKX_PCI_UNCORE_UPI, 0),
+ },
+ { /* UPI0 Link 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, SKX_PCI_UNCORE_UPI, 1),
+ },
+ { /* UPI1 Link 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, SKX_PCI_UNCORE_UPI, 2),
+ },
+ { /* M2PCIe 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 1, SKX_PCI_UNCORE_M2PCIE, 0),
+ },
+ { /* M2PCIe 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 1, SKX_PCI_UNCORE_M2PCIE, 1),
+ },
+ { /* M2PCIe 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(23, 1, SKX_PCI_UNCORE_M2PCIE, 2),
+ },
+ { /* M2PCIe 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3),
+ },
+ { /* M3UPI0 Link 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0),
+ },
+ { /* M3UPI0 Link 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1),
+ },
+ { /* M3UPI1 Link 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2),
+ },
+ { /* end: all zeroes */ }
+};
+
+
+static struct pci_driver skx_uncore_pci_driver = {
+ .name = "skx_uncore",
+ .id_table = skx_uncore_pci_ids,
+};
+
+int skx_uncore_pci_init(void)
+{
+ /* need to double check pci address */
+ int ret = snbep_pci2phy_map_init(0x2014, SKX_CPUNODEID, SKX_GIDNIDMAP, false);
+
+ if (ret)
+ return ret;
+
+ uncore_pci_uncores = skx_pci_uncores;
+ uncore_pci_driver = &skx_uncore_pci_driver;
+ return 0;
+}
+
+/* end of SKX uncore support */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8c4a477..5874d8d 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -194,12 +194,13 @@
*/
struct debug_store *ds;
u64 pebs_enabled;
+ int n_pebs;
+ int n_large_pebs;
/*
* Intel LBR bits
*/
int lbr_users;
- void *lbr_context;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
struct er_account *lbr_sel;
@@ -508,6 +509,8 @@
void (*enable_all)(int added);
void (*enable)(struct perf_event *);
void (*disable)(struct perf_event *);
+ void (*add)(struct perf_event *);
+ void (*del)(struct perf_event *);
int (*hw_config)(struct perf_event *event);
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel;
@@ -888,6 +891,10 @@
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
+void intel_pmu_pebs_add(struct perf_event *event);
+
+void intel_pmu_pebs_del(struct perf_event *event);
+
void intel_pmu_pebs_enable(struct perf_event *event);
void intel_pmu_pebs_disable(struct perf_event *event);
@@ -906,9 +913,9 @@
void intel_pmu_lbr_reset(void);
-void intel_pmu_lbr_enable(struct perf_event *event);
+void intel_pmu_lbr_add(struct perf_event *event);
-void intel_pmu_lbr_disable(struct perf_event *event);
+void intel_pmu_lbr_del(struct perf_event *event);
void intel_pmu_lbr_enable_all(bool pmi);
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 2f29f4e..cb13c05 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -378,7 +378,7 @@
put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
} put_user_catch(err);
- err |= copy_siginfo_to_user32(&frame->info, &ksig->info);
+ err |= __copy_siginfo_to_user32(&frame->info, &ksig->info, false);
err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index e77a644..1b02038 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -217,10 +217,14 @@
*/
#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
output, input...) \
+{ \
+ register void *__sp asm(_ASM_SP); \
asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
"call %P[new2]", feature2) \
- : output : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
- [new2] "i" (newfunc2), ## input)
+ : output, "+r" (__sp) \
+ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
+ [new2] "i" (newfunc2), ## input); \
+}
/*
* use this macro(s) if you need more than one output parameter
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 1243577..f5aaf6c 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -650,8 +650,8 @@
static inline void ipi_entering_ack_irq(void)
{
- ack_APIC_irq();
irq_enter();
+ ack_APIC_irq();
}
static inline void exiting_irq(void)
@@ -661,9 +661,8 @@
static inline void exiting_ack_irq(void)
{
- irq_exit();
- /* Ack only at the end to avoid potential reentry */
ack_APIC_irq();
+ irq_exit();
}
extern void ioapic_zap_locks(void);
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 9733361..97848cd 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -158,53 +158,9 @@
* value of "*ptr".
*
* xadd() is locked when multiple CPUs are online
- * xadd_sync() is always locked
- * xadd_local() is never locked
*/
#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock)
#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX)
-#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ")
-#define xadd_local(ptr, inc) __xadd((ptr), (inc), "")
-
-#define __add(ptr, inc, lock) \
- ({ \
- __typeof__ (*(ptr)) __ret = (inc); \
- switch (sizeof(*(ptr))) { \
- case __X86_CASE_B: \
- asm volatile (lock "addb %b1, %0\n" \
- : "+m" (*(ptr)) : "qi" (inc) \
- : "memory", "cc"); \
- break; \
- case __X86_CASE_W: \
- asm volatile (lock "addw %w1, %0\n" \
- : "+m" (*(ptr)) : "ri" (inc) \
- : "memory", "cc"); \
- break; \
- case __X86_CASE_L: \
- asm volatile (lock "addl %1, %0\n" \
- : "+m" (*(ptr)) : "ri" (inc) \
- : "memory", "cc"); \
- break; \
- case __X86_CASE_Q: \
- asm volatile (lock "addq %1, %0\n" \
- : "+m" (*(ptr)) : "ri" (inc) \
- : "memory", "cc"); \
- break; \
- default: \
- __add_wrong_size(); \
- } \
- __ret; \
- })
-
-/*
- * add_*() adds "inc" to "*ptr"
- *
- * __add() takes a lock prefix
- * add_smp() is locked when multiple CPUs are online
- * add_sync() is always locked
- */
-#define add_smp(ptr, inc) __add((ptr), (inc), LOCK_PREFIX)
-#define add_sync(ptr, inc) __add((ptr), (inc), "lock; ")
#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \
({ \
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index a188061..03d269b 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -275,10 +275,10 @@
#ifdef CONFIG_X86_X32_ABI
typedef struct user_regs_struct compat_elf_gregset_t;
-#define PR_REG_SIZE(S) (test_thread_flag(TIF_IA32) ? 68 : 216)
-#define PRSTATUS_SIZE(S) (test_thread_flag(TIF_IA32) ? 144 : 296)
-#define SET_PR_FPVALID(S,V) \
- do { *(int *) (((void *) &((S)->pr_reg)) + PR_REG_SIZE(0)) = (V); } \
+/* Full regset -- prstatus on x32, otherwise on ia32 */
+#define PRSTATUS_SIZE(S, R) (R != sizeof(S.pr_reg) ? 144 : 296)
+#define SET_PR_FPVALID(S, V, R) \
+ do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \
while (0)
#define COMPAT_USE_64BIT_TIME \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 92a8308..1188bc8 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -106,7 +106,6 @@
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4e10d73..12080d8 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -36,7 +36,7 @@
extern struct desc_ptr idt_descr;
extern gate_desc idt_table[];
-extern struct desc_ptr debug_idt_descr;
+extern const struct desc_ptr debug_idt_descr;
extern gate_desc debug_idt_table[];
struct gdt_page {
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 3ab0537..476b574 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -10,8 +10,8 @@
#include <uapi/asm/e820.h>
#ifndef __ASSEMBLY__
/* see comment in arch/x86/kernel/e820.c */
-extern struct e820map e820;
-extern struct e820map e820_saved;
+extern struct e820map *e820;
+extern struct e820map *e820_saved;
extern unsigned long pci_mem_start;
extern int e820_any_mapped(u64 start, u64 end, unsigned type);
@@ -53,6 +53,8 @@
extern void setup_memory_map(void);
extern char *default_machine_specific_memory_setup(void);
+extern void e820_reallocate_tables(void);
+
/*
* Returns true iff the specified range [s,e) is completely contained inside
* the ISA region.
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index d0bb76d..389d700 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -117,7 +117,6 @@
extern pgd_t * __init efi_call_phys_prolog(void);
extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
extern void __init efi_print_memmap(void);
-extern void __init efi_unmap_memmap(void);
extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
@@ -192,14 +191,7 @@
struct efi_config {
u64 image_handle;
u64 table;
- u64 allocate_pool;
- u64 allocate_pages;
- u64 get_memory_map;
- u64 free_pool;
- u64 free_pages;
- u64 locate_handle;
- u64 handle_protocol;
- u64 exit_boot_services;
+ u64 boot_services;
u64 text_output;
efi_status_t (*call)(unsigned long, ...);
bool is64;
@@ -207,14 +199,27 @@
__pure const struct efi_config *__efi_early(void);
+static inline bool efi_is_64bit(void)
+{
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return false;
+
+ if (!IS_ENABLED(CONFIG_EFI_MIXED))
+ return true;
+
+ return __efi_early()->is64;
+}
+
#define efi_call_early(f, ...) \
- __efi_early()->call(__efi_early()->f, __VA_ARGS__);
+ __efi_early()->call(efi_is_64bit() ? \
+ ((efi_boot_services_64_t *)(unsigned long) \
+ __efi_early()->boot_services)->f : \
+ ((efi_boot_services_32_t *)(unsigned long) \
+ __efi_early()->boot_services)->f, __VA_ARGS__)
#define __efi_call_early(f, ...) \
__efi_early()->call((unsigned long)f, __VA_ARGS__);
-#define efi_is_64bit() __efi_early()->is64
-
extern bool efi_reboot_required(void);
#else
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 0e970d0..20a1fbf7 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -19,6 +19,12 @@
# define ia32_setup_rt_frame __setup_rt_frame
#endif
+#ifdef CONFIG_COMPAT
+int __copy_siginfo_to_user32(compat_siginfo_t __user *to,
+ const siginfo_t *from, bool x32_ABI);
+#endif
+
+
extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
struct task_struct *tsk);
extern void convert_to_fxsr(struct task_struct *tsk,
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index ae55a43..d4957ac 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -45,7 +45,8 @@
extern u64 xfeatures_mask;
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
-extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
+extern void __init update_regset_xstate_info(unsigned int size,
+ u64 xstate_mask);
void fpu__xstate_clear_all_cpu_caps(void);
void *get_xsave_addr(struct xregs_state *xsave, int xstate);
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index a4820d4..eccd0ac 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -6,6 +6,7 @@
# define MCOUNT_ADDR ((unsigned long)(__fentry__))
#else
# define MCOUNT_ADDR ((unsigned long)(mcount))
+# define HAVE_FUNCTION_GRAPH_FP_TEST
#endif
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
@@ -13,6 +14,8 @@
#define ARCH_SUPPORTS_FTRACE_OPS 1
#endif
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
#ifndef __ASSEMBLY__
extern void mcount(void);
extern atomic_t modifying_ftrace_code;
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 055ea99..67942b6 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -43,6 +43,9 @@
/* X2APIC detection (run once per boot) */
bool (*x2apic_available)(void);
+
+ /* pin current vcpu to specified physical cpu (run rarely) */
+ void (*pin_vcpu)(int);
};
extern const struct hypervisor_x86 *x86_hyper;
@@ -56,6 +59,7 @@
extern void init_hypervisor(struct cpuinfo_x86 *c);
extern void init_hypervisor_platform(void);
extern bool hypervisor_x2apic_available(void);
+extern void hypervisor_pin_vcpu(int cpu);
#else
static inline void init_hypervisor(struct cpuinfo_x86 *c) { }
static inline void init_hypervisor_platform(void) { }
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 6277194..9ae5ab8 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -56,8 +56,8 @@
#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */
#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
-#define INTEL_FAM6_ATOM_MERRIFIELD1 0x4A /* Tangier */
-#define INTEL_FAM6_ATOM_MERRIFIELD2 0x5A /* Annidale */
+#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
+#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 9d6b097..5b6753d 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -18,6 +18,8 @@
extern int intel_mid_pci_init(void);
extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
+extern void intel_mid_pwr_power_off(void);
+
#define INTEL_MID_PWR_LSS_OFFSET 4
#define INTEL_MID_PWR_LSS_TYPE (1 << 7)
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h
index 925b605..4fb1d0a 100644
--- a/arch/x86/include/asm/intel_scu_ipc.h
+++ b/arch/x86/include/asm/intel_scu_ipc.h
@@ -3,6 +3,8 @@
#include <linux/notifier.h>
+#define IPCMSG_COLD_OFF 0x80 /* Only for Tangier */
+
#define IPCMSG_WARM_RESET 0xF0
#define IPCMSG_COLD_RESET 0xF1
#define IPCMSG_SOFT_RESET 0xF2
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 2674ee3..1052a79 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -6,6 +6,7 @@
#ifdef CONFIG_RANDOMIZE_MEMORY
extern unsigned long page_offset_base;
extern unsigned long vmalloc_base;
+extern unsigned long vmemmap_base;
void kernel_randomize_memory(void);
#else
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 1ef9d58..d318811 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -24,8 +24,6 @@
extern void printk_address(unsigned long address);
extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long);
-extern void show_trace(struct task_struct *t, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp);
extern void show_stack_regs(struct pt_regs *regs);
extern void __show_regs(struct pt_regs *regs, int all);
extern unsigned long oops_begin(void);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 8bf766e..9bd7ff5 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -40,9 +40,10 @@
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
/* AMD-specific bits */
+#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
+#define MCI_STATUS_SYNDV (1ULL<<53) /* synd reg. valid */
#define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
-#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
/*
* McaX field if set indicates a given bank supports MCA extensions:
@@ -110,6 +111,7 @@
#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
+#define MSR_AMD64_SMCA_MC0_SYND 0xc0002006
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
@@ -119,6 +121,7 @@
#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_SYND(x) (MSR_AMD64_SMCA_MC0_SYND + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
@@ -334,44 +337,47 @@
* Scalable MCA.
*/
#ifdef CONFIG_X86_MCE_AMD
-enum amd_ip_types {
- SMCA_F17H_CORE = 0, /* Core errors */
- SMCA_DF, /* Data Fabric */
- SMCA_UMC, /* Unified Memory Controller */
- SMCA_PB, /* Parameter Block */
- SMCA_PSP, /* Platform Security Processor */
- SMCA_SMU, /* System Management Unit */
- N_AMD_IP_TYPES
-};
-struct amd_hwid {
- const char *name;
- unsigned int hwid;
-};
-
-extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];
-
-enum amd_core_mca_blocks {
+/* These may be used by multiple smca_hwid_mcatypes */
+enum smca_bank_types {
SMCA_LS = 0, /* Load Store */
SMCA_IF, /* Instruction Fetch */
- SMCA_L2_CACHE, /* L2 cache */
- SMCA_DE, /* Decoder unit */
- RES, /* Reserved */
- SMCA_EX, /* Execution unit */
+ SMCA_L2_CACHE, /* L2 Cache */
+ SMCA_DE, /* Decoder Unit */
+ SMCA_EX, /* Execution Unit */
SMCA_FP, /* Floating Point */
- SMCA_L3_CACHE, /* L3 cache */
- N_CORE_MCA_BLOCKS
+ SMCA_L3_CACHE, /* L3 Cache */
+ SMCA_CS, /* Coherent Slave */
+ SMCA_PIE, /* Power, Interrupts, etc. */
+ SMCA_UMC, /* Unified Memory Controller */
+ SMCA_PB, /* Parameter Block */
+ SMCA_PSP, /* Platform Security Processor */
+ SMCA_SMU, /* System Management Unit */
+ N_SMCA_BANK_TYPES
};
-extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];
-
-enum amd_df_mca_blocks {
- SMCA_CS = 0, /* Coherent Slave */
- SMCA_PIE, /* Power management, Interrupts, etc */
- N_DF_BLOCKS
+struct smca_bank_name {
+ const char *name; /* Short name for sysfs */
+ const char *long_name; /* Long name for pretty-printing */
};
-extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
+extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES];
+
+#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype)
+
+struct smca_hwid_mcatype {
+ unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
+ u32 hwid_mcatype; /* (hwid,mcatype) tuple */
+ u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
+};
+
+struct smca_bank_info {
+ struct smca_hwid_mcatype *type;
+ u32 type_instance;
+};
+
+extern struct smca_bank_info smca_banks[MAX_NR_BANKS];
+
#endif
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index b07233b..3200704 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -6,7 +6,6 @@
#include <asm/x86_init.h>
#include <asm/apicdef.h>
-extern int apic_version[];
extern int pic_mode;
#ifdef CONFIG_X86_32
@@ -40,6 +39,7 @@
extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
extern unsigned int boot_cpu_physical_apicid;
+extern u8 boot_cpu_apic_version;
extern unsigned long mp_lapic_addr;
#ifdef CONFIG_X86_LOCAL_APIC
@@ -86,6 +86,7 @@
#endif
int generic_processor_info(int apicid, int version);
+int __generic_processor_info(int apicid, int version, bool enabled);
#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC)
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 2970d22..ce93281 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -80,10 +80,6 @@
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
}
-static inline unsigned long __read_cr4_safe(void)
-{
- return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
-}
static inline void __write_cr4(unsigned long x)
{
@@ -661,8 +657,6 @@
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
-#ifdef CONFIG_QUEUED_SPINLOCKS
-
static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
u32 val)
{
@@ -684,22 +678,6 @@
PVOP_VCALL1(pv_lock_ops.kick, cpu);
}
-#else /* !CONFIG_QUEUED_SPINLOCKS */
-
-static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
- __ticket_t ticket)
-{
- PVOP_VCALLEE2(pv_lock_ops.lock_spinning, lock, ticket);
-}
-
-static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
- __ticket_t ticket)
-{
- PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
-}
-
-#endif /* CONFIG_QUEUED_SPINLOCKS */
-
#endif /* SMP && PARAVIRT_SPINLOCKS */
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 7fa9e77..0f400c0 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -108,7 +108,6 @@
unsigned long (*read_cr0)(void);
void (*write_cr0)(unsigned long);
- unsigned long (*read_cr4_safe)(void);
unsigned long (*read_cr4)(void);
void (*write_cr4)(unsigned long);
@@ -301,23 +300,16 @@
struct arch_spinlock;
#ifdef CONFIG_SMP
#include <asm/spinlock_types.h>
-#else
-typedef u16 __ticket_t;
#endif
struct qspinlock;
struct pv_lock_ops {
-#ifdef CONFIG_QUEUED_SPINLOCKS
void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
struct paravirt_callee_save queued_spin_unlock;
void (*wait)(u8 *ptr, u8 val);
void (*kick)(int cpu);
-#else /* !CONFIG_QUEUED_SPINLOCKS */
- struct paravirt_callee_save lock_spinning;
- void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
-#endif /* !CONFIG_QUEUED_SPINLOCKS */
};
/* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6fdef9e..3a26420 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -57,11 +57,13 @@
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
#define VMALLOC_SIZE_TB _AC(32, UL)
#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
-#define VMEMMAP_START _AC(0xffffea0000000000, UL)
+#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
#ifdef CONFIG_RANDOMIZE_MEMORY
#define VMALLOC_START vmalloc_base
+#define VMEMMAP_START vmemmap_base
#else
#define VMALLOC_START __VMALLOC_BASE
+#define VMEMMAP_START __VMEMMAP_BASE
#endif /* CONFIG_RANDOMIZE_MEMORY */
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 643eba4..2c1ebeb 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -46,10 +46,7 @@
static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
{
- if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
- return memcpy_mcsafe(dst, src, n);
- memcpy(dst, src, n);
- return 0;
+ return memcpy_mcsafe(dst, src, n);
}
/**
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 63def95..984a7bf 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -389,9 +389,9 @@
unsigned short fsindex;
unsigned short gsindex;
#endif
-#ifdef CONFIG_X86_32
- unsigned long ip;
-#endif
+
+ u32 status; /* thread synchronous flags */
+
#ifdef CONFIG_X86_64
unsigned long fsbase;
unsigned long gsbase;
@@ -438,6 +438,15 @@
};
/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
+
+/*
* Set IOPL bits in EFLAGS from given mask
*/
static inline void native_set_iopl_mask(unsigned mask)
@@ -724,8 +733,6 @@
.addr_limit = KERNEL_DS, \
}
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
/*
* TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
* This is necessary to guarantee that the entire "struct pt_regs"
@@ -776,17 +783,13 @@
.addr_limit = KERNEL_DS, \
}
-/*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-#define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8))
-
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
extern unsigned long KSTK_ESP(struct task_struct *task);
#endif /* CONFIG_X86_64 */
+extern unsigned long thread_saved_pc(struct task_struct *tsk);
+
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
unsigned long new_sp);
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index b2988c0..230e190 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -44,9 +44,9 @@
extern struct real_mode_header *real_mode_header;
extern unsigned char real_mode_blob_end[];
-extern unsigned long init_rsp;
extern unsigned long initial_code;
extern unsigned long initial_gs;
+extern unsigned long initial_stack;
extern unsigned char real_mode_blob[];
extern unsigned char real_mode_relocs[];
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index 8dbc762..3d33a71 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -154,7 +154,7 @@
: "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1),
CC_OUT(e) (result)
: "er" (RWSEM_ACTIVE_WRITE_BIAS)
- : "memory", "cc");
+ : "memory");
return result;
}
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index dd1e7d6..8af22be 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -23,6 +23,10 @@
unsigned long sig[_NSIG_WORDS];
} sigset_t;
+/* non-uapi in-kernel SA_FLAGS for those indicates ABI for a signal frame */
+#define SA_IA32_ABI 0x02000000u
+#define SA_X32_ABI 0x01000000u
+
#ifndef CONFIG_COMPAT
typedef sigset_t compat_sigset_t;
#endif
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index ebd0c16..19980b3 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -39,9 +39,6 @@
DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
#endif
-/* Static state in head.S used to set up a CPU */
-extern unsigned long stack_start; /* Initial stack pointer address */
-
struct task_struct;
struct smp_ops {
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 587d791..19a2224 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -59,22 +59,19 @@
static inline unsigned long native_read_cr4(void)
{
unsigned long val;
- asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
- return val;
-}
-
-static inline unsigned long native_read_cr4_safe(void)
-{
- unsigned long val;
- /* This could fault if %cr4 does not exist. In x86_64, a cr4 always
- * exists, so it will never fail. */
#ifdef CONFIG_X86_32
+ /*
+ * This could fault if CR4 does not exist. Non-existent CR4
+ * is functionally equivalent to CR4 == 0. Keep it simple and pretend
+ * that CR4 == 0 on CPUs that don't have CR4.
+ */
asm volatile("1: mov %%cr4, %0\n"
"2:\n"
_ASM_EXTABLE(1b, 2b)
: "=r" (val), "=m" (__force_order) : "0" (0));
#else
- val = native_read_cr4();
+ /* CR4 always exists on x86_64. */
+ asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
#endif
return val;
}
@@ -182,11 +179,6 @@
return native_read_cr4();
}
-static inline unsigned long __read_cr4_safe(void)
-{
- return native_read_cr4_safe();
-}
-
static inline void __write_cr4(unsigned long x)
{
native_write_cr4(x);
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index be0a059..921bea7 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -20,187 +20,13 @@
* (the type definitions are in asm/spinlock_types.h)
*/
-#ifdef CONFIG_X86_32
-# define LOCK_PTR_REG "a"
-#else
-# define LOCK_PTR_REG "D"
-#endif
-
-#if defined(CONFIG_X86_32) && (defined(CONFIG_X86_PPRO_FENCE))
-/*
- * On PPro SMP, we use a locked operation to unlock
- * (PPro errata 66, 92)
- */
-# define UNLOCK_LOCK_PREFIX LOCK_PREFIX
-#else
-# define UNLOCK_LOCK_PREFIX
-#endif
-
/* How long a lock should spin before we consider blocking */
#define SPIN_THRESHOLD (1 << 15)
extern struct static_key paravirt_ticketlocks_enabled;
static __always_inline bool static_key_false(struct static_key *key);
-#ifdef CONFIG_QUEUED_SPINLOCKS
#include <asm/qspinlock.h>
-#else
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-
-static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
-{
- set_bit(0, (volatile unsigned long *)&lock->tickets.head);
-}
-
-#else /* !CONFIG_PARAVIRT_SPINLOCKS */
-static __always_inline void __ticket_lock_spinning(arch_spinlock_t *lock,
- __ticket_t ticket)
-{
-}
-static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
- __ticket_t ticket)
-{
-}
-
-#endif /* CONFIG_PARAVIRT_SPINLOCKS */
-static inline int __tickets_equal(__ticket_t one, __ticket_t two)
-{
- return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
-}
-
-static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
- __ticket_t head)
-{
- if (head & TICKET_SLOWPATH_FLAG) {
- arch_spinlock_t old, new;
-
- old.tickets.head = head;
- new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
- old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
- new.tickets.tail = old.tickets.tail;
-
- /* try to clear slowpath flag when there are no contenders */
- cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
- }
-}
-
-static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
-{
- return __tickets_equal(lock.tickets.head, lock.tickets.tail);
-}
-
-/*
- * Ticket locks are conceptually two parts, one indicating the current head of
- * the queue, and the other indicating the current tail. The lock is acquired
- * by atomically noting the tail and incrementing it by one (thus adding
- * ourself to the queue and noting our position), then waiting until the head
- * becomes equal to the the initial value of the tail.
- *
- * We use an xadd covering *both* parts of the lock, to increment the tail and
- * also load the position of the head, which takes care of memory ordering
- * issues and should be optimal for the uncontended case. Note the tail must be
- * in the high part, because a wide xadd increment of the low part would carry
- * up and contaminate the high part.
- */
-static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
-{
- register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC };
-
- inc = xadd(&lock->tickets, inc);
- if (likely(inc.head == inc.tail))
- goto out;
-
- for (;;) {
- unsigned count = SPIN_THRESHOLD;
-
- do {
- inc.head = READ_ONCE(lock->tickets.head);
- if (__tickets_equal(inc.head, inc.tail))
- goto clear_slowpath;
- cpu_relax();
- } while (--count);
- __ticket_lock_spinning(lock, inc.tail);
- }
-clear_slowpath:
- __ticket_check_and_clear_slowpath(lock, inc.head);
-out:
- barrier(); /* make sure nothing creeps before the lock is taken */
-}
-
-static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
- arch_spinlock_t old, new;
-
- old.tickets = READ_ONCE(lock->tickets);
- if (!__tickets_equal(old.tickets.head, old.tickets.tail))
- return 0;
-
- new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
- new.head_tail &= ~TICKET_SLOWPATH_FLAG;
-
- /* cmpxchg is a full barrier, so nothing can move before it */
- return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
-}
-
-static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
- if (TICKET_SLOWPATH_FLAG &&
- static_key_false(¶virt_ticketlocks_enabled)) {
- __ticket_t head;
-
- BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
-
- head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
-
- if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
- head &= ~TICKET_SLOWPATH_FLAG;
- __ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
- }
- } else
- __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
-}
-
-static inline int arch_spin_is_locked(arch_spinlock_t *lock)
-{
- struct __raw_tickets tmp = READ_ONCE(lock->tickets);
-
- return !__tickets_equal(tmp.tail, tmp.head);
-}
-
-static inline int arch_spin_is_contended(arch_spinlock_t *lock)
-{
- struct __raw_tickets tmp = READ_ONCE(lock->tickets);
-
- tmp.head &= ~TICKET_SLOWPATH_FLAG;
- return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
-}
-#define arch_spin_is_contended arch_spin_is_contended
-
-static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
- unsigned long flags)
-{
- arch_spin_lock(lock);
-}
-
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- __ticket_t head = READ_ONCE(lock->tickets.head);
-
- for (;;) {
- struct __raw_tickets tmp = READ_ONCE(lock->tickets);
- /*
- * We need to check "unlocked" in a loop, tmp.head == head
- * can be false positive because of overflow.
- */
- if (__tickets_equal(tmp.head, tmp.tail) ||
- !__tickets_equal(tmp.head, head))
- break;
-
- cpu_relax();
- }
-}
-#endif /* CONFIG_QUEUED_SPINLOCKS */
/*
* Read-write spinlocks, allowing multiple readers
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 65c3e37..25311eb 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -23,20 +23,7 @@
#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
-#ifdef CONFIG_QUEUED_SPINLOCKS
#include <asm-generic/qspinlock_types.h>
-#else
-typedef struct arch_spinlock {
- union {
- __ticketpair_t head_tail;
- struct __raw_tickets {
- __ticket_t head, tail;
- } tickets;
- };
-} arch_spinlock_t;
-
-#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
-#endif /* CONFIG_QUEUED_SPINLOCKS */
#include <asm-generic/qrwlock_types.h>
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 0944218..37f2e0b 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -8,86 +8,86 @@
#include <linux/uaccess.h>
#include <linux/ptrace.h>
+#include <asm/switch_to.h>
+
+enum stack_type {
+ STACK_TYPE_UNKNOWN,
+ STACK_TYPE_TASK,
+ STACK_TYPE_IRQ,
+ STACK_TYPE_SOFTIRQ,
+ STACK_TYPE_EXCEPTION,
+ STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
+};
+
+struct stack_info {
+ enum stack_type type;
+ unsigned long *begin, *end, *next_sp;
+};
+
+bool in_task_stack(unsigned long *stack, struct task_struct *task,
+ struct stack_info *info);
+
+int get_stack_info(unsigned long *stack, struct task_struct *task,
+ struct stack_info *info, unsigned long *visit_mask);
+
+void stack_type_str(enum stack_type type, const char **begin,
+ const char **end);
+
+static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
+{
+ void *begin = info->begin;
+ void *end = info->end;
+
+ return (info->type != STACK_TYPE_UNKNOWN &&
+ addr >= begin && addr < end &&
+ addr + len > begin && addr + len <= end);
+}
extern int kstack_depth_to_print;
-struct thread_info;
-struct stacktrace_ops;
-
-typedef unsigned long (*walk_stack_t)(struct task_struct *task,
- unsigned long *stack,
- unsigned long bp,
- const struct stacktrace_ops *ops,
- void *data,
- unsigned long *end,
- int *graph);
-
-extern unsigned long
-print_context_stack(struct task_struct *task,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data,
- unsigned long *end, int *graph);
-
-extern unsigned long
-print_context_stack_bp(struct task_struct *task,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data,
- unsigned long *end, int *graph);
-
-/* Generic stack tracer with callbacks */
-
-struct stacktrace_ops {
- int (*address)(void *data, unsigned long address, int reliable);
- /* On negative return stop dumping */
- int (*stack)(void *data, char *name);
- walk_stack_t walk_stack;
-};
-
-void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data);
-
#ifdef CONFIG_X86_32
#define STACKSLOTS_PER_LINE 8
-#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
#else
#define STACKSLOTS_PER_LINE 4
-#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
#endif
#ifdef CONFIG_FRAME_POINTER
-static inline unsigned long
-stack_frame(struct task_struct *task, struct pt_regs *regs)
+static inline unsigned long *
+get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
{
- unsigned long bp;
-
if (regs)
- return regs->bp;
+ return (unsigned long *)regs->bp;
- if (task == current) {
- /* Grab bp right from our regs */
- get_bp(bp);
- return bp;
- }
+ if (task == current)
+ return __builtin_frame_address(0);
- /* bp is the last reg pushed by switch_to */
- return *(unsigned long *)task->thread.sp;
+ return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp;
}
#else
-static inline unsigned long
-stack_frame(struct task_struct *task, struct pt_regs *regs)
+static inline unsigned long *
+get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
{
- return 0;
+ return NULL;
}
-#endif
+#endif /* CONFIG_FRAME_POINTER */
-extern void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl);
+static inline unsigned long *
+get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
+{
+ if (regs)
+ return (unsigned long *)kernel_stack_pointer(regs);
-extern void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl);
+ if (task == current)
+ return __builtin_frame_address(0);
+
+ return (unsigned long *)task->thread.sp;
+}
+
+void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, char *log_lvl);
+
+void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *sp, char *log_lvl);
extern unsigned int code_bytes;
@@ -106,7 +106,7 @@
{
struct stack_frame *frame;
- get_bp(frame);
+ frame = __builtin_frame_address(0);
#ifdef CONFIG_FRAME_POINTER
frame = frame->next_frame;
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 90dbbd9..a164862 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -2,6 +2,7 @@
#define _ASM_X86_STRING_64_H
#ifdef __KERNEL__
+#include <linux/jump_label.h>
/* Written 2002 by Andi Kleen */
@@ -78,6 +79,9 @@
#define memset(s, c, n) __memset(s, c, n)
#endif
+__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
+DECLARE_STATIC_KEY_FALSE(mcsafe_key);
+
/**
* memcpy_mcsafe - copy memory with indication if a machine check happened
*
@@ -86,10 +90,23 @@
* @cnt: number of bytes to copy
*
* Low level memory copy function that catches machine checks
+ * We only call into the "safe" function on systems that can
+ * actually do machine check recovery. Everyone else can just
+ * use memcpy().
*
* Return 0 for success, -EFAULT for fail
*/
-int memcpy_mcsafe(void *dst, const void *src, size_t cnt);
+static __always_inline __must_check int
+memcpy_mcsafe(void *dst, const void *src, size_t cnt)
+{
+#ifdef CONFIG_X86_MCE
+ if (static_branch_unlikely(&mcsafe_key))
+ return memcpy_mcsafe_unrolled(dst, src, cnt);
+ else
+#endif
+ memcpy(dst, src, cnt);
+ return 0;
+}
#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8f321a1..5cb436a 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,130 +2,66 @@
#define _ASM_X86_SWITCH_TO_H
struct task_struct; /* one of the stranger aspects of C forward declarations */
+
+struct task_struct *__switch_to_asm(struct task_struct *prev,
+ struct task_struct *next);
+
__visible struct task_struct *__switch_to(struct task_struct *prev,
- struct task_struct *next);
+ struct task_struct *next);
struct tss_struct;
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss);
-#ifdef CONFIG_X86_32
+/* This runs runs on the previous thread's stack. */
+static inline void prepare_switch_to(struct task_struct *prev,
+ struct task_struct *next)
+{
+#ifdef CONFIG_VMAP_STACK
+ /*
+ * If we switch to a stack that has a top-level paging entry
+ * that is not present in the current mm, the resulting #PF will
+ * will be promoted to a double-fault and we'll panic. Probe
+ * the new stack now so that vmalloc_fault can fix up the page
+ * tables if needed. This can only happen if we use a stack
+ * in vmap space.
+ *
+ * We assume that the stack is aligned so that it never spans
+ * more than one top-level paging entry.
+ *
+ * To minimize cache pollution, just follow the stack pointer.
+ */
+ READ_ONCE(*(unsigned char *)next->thread.sp);
+#endif
+}
-#ifdef CONFIG_CC_STACKPROTECTOR
-#define __switch_canary \
- "movl %P[task_canary](%[next]), %%ebx\n\t" \
- "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
-#define __switch_canary_oparam \
- , [stack_canary] "=m" (stack_canary.canary)
-#define __switch_canary_iparam \
- , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
-#else /* CC_STACKPROTECTOR */
-#define __switch_canary
-#define __switch_canary_oparam
-#define __switch_canary_iparam
-#endif /* CC_STACKPROTECTOR */
+asmlinkage void ret_from_fork(void);
-/*
- * Saving eflags is important. It switches not only IOPL between tasks,
- * it also protects other tasks from NT leaking through sysenter etc.
- */
+/* data that is pointed to by thread.sp */
+struct inactive_task_frame {
+#ifdef CONFIG_X86_64
+ unsigned long r15;
+ unsigned long r14;
+ unsigned long r13;
+ unsigned long r12;
+#else
+ unsigned long si;
+ unsigned long di;
+#endif
+ unsigned long bx;
+ unsigned long bp;
+ unsigned long ret_addr;
+};
+
+struct fork_frame {
+ struct inactive_task_frame frame;
+ struct pt_regs regs;
+};
+
#define switch_to(prev, next, last) \
do { \
- /* \
- * Context-switching clobbers all registers, so we clobber \
- * them explicitly, via unused output variables. \
- * (EAX and EBP is not listed because EBP is saved/restored \
- * explicitly for wchan access and EAX is the return value of \
- * __switch_to()) \
- */ \
- unsigned long ebx, ecx, edx, esi, edi; \
+ prepare_switch_to(prev, next); \
\
- asm volatile("pushl %%ebp\n\t" /* save EBP */ \
- "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
- "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
- "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
- "pushl %[next_ip]\n\t" /* restore EIP */ \
- __switch_canary \
- "jmp __switch_to\n" /* regparm call */ \
- "1:\t" \
- "popl %%ebp\n\t" /* restore EBP */ \
- \
- /* output parameters */ \
- : [prev_sp] "=m" (prev->thread.sp), \
- [prev_ip] "=m" (prev->thread.ip), \
- "=a" (last), \
- \
- /* clobbered output registers: */ \
- "=b" (ebx), "=c" (ecx), "=d" (edx), \
- "=S" (esi), "=D" (edi) \
- \
- __switch_canary_oparam \
- \
- /* input parameters: */ \
- : [next_sp] "m" (next->thread.sp), \
- [next_ip] "m" (next->thread.ip), \
- \
- /* regparm parameters for __switch_to(): */ \
- [prev] "a" (prev), \
- [next] "d" (next) \
- \
- __switch_canary_iparam \
- \
- : /* reloaded segment registers */ \
- "memory"); \
+ ((last) = __switch_to_asm((prev), (next))); \
} while (0)
-#else /* CONFIG_X86_32 */
-
-/* frame pointer must be last for get_wchan */
-#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
-
-#define __EXTRA_CLOBBER \
- , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
- "r12", "r13", "r14", "r15", "flags"
-
-#ifdef CONFIG_CC_STACKPROTECTOR
-#define __switch_canary \
- "movq %P[task_canary](%%rsi),%%r8\n\t" \
- "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
-#define __switch_canary_oparam \
- , [gs_canary] "=m" (irq_stack_union.stack_canary)
-#define __switch_canary_iparam \
- , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
-#else /* CC_STACKPROTECTOR */
-#define __switch_canary
-#define __switch_canary_oparam
-#define __switch_canary_iparam
-#endif /* CC_STACKPROTECTOR */
-
-/*
- * There is no need to save or restore flags, because flags are always
- * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL
- * has no effect.
- */
-#define switch_to(prev, next, last) \
- asm volatile(SAVE_CONTEXT \
- "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
- "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
- "call __switch_to\n\t" \
- "movq "__percpu_arg([current_task])",%%rsi\n\t" \
- __switch_canary \
- "movq %P[thread_info](%%rsi),%%r8\n\t" \
- "movq %%rax,%%rdi\n\t" \
- "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
- "jnz ret_from_fork\n\t" \
- RESTORE_CONTEXT \
- : "=a" (last) \
- __switch_canary_oparam \
- : [next] "S" (next), [prev] "D" (prev), \
- [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
- [ti_flags] "i" (offsetof(struct thread_info, flags)), \
- [_tif_fork] "i" (_TIF_FORK), \
- [thread_info] "i" (offsetof(struct task_struct, stack)), \
- [current_task] "m" (current_task) \
- __switch_canary_iparam \
- : "memory", "cc" __EXTRA_CLOBBER)
-
-#endif /* CONFIG_X86_32 */
-
#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 4e23dd1..e3c95e8 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@
* TS_COMPAT is set for 32-bit syscall entries and then
* remains set until we return to user mode.
*/
- if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED))
+ if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
/*
* Sign-extend the value so (int)-EFOO becomes (long)-EFOO
* and will match correctly in comparisons.
@@ -116,7 +116,7 @@
unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task_thread_info(task)->status & TS_COMPAT)
+ if (task->thread.status & TS_COMPAT)
switch (i) {
case 0:
if (!n--) break;
@@ -177,7 +177,7 @@
const unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task_thread_info(task)->status & TS_COMPAT)
+ if (task->thread.status & TS_COMPAT)
switch (i) {
case 0:
if (!n--) break;
@@ -234,18 +234,8 @@
static inline int syscall_get_arch(void)
{
-#ifdef CONFIG_IA32_EMULATION
- /*
- * TS_COMPAT is set for 32-bit syscall entry and then
- * remains set until we return to user mode.
- *
- * x32 tasks should be considered AUDIT_ARCH_X86_64.
- */
- if (task_thread_info(current)->status & TS_COMPAT)
- return AUDIT_ARCH_I386;
-#endif
- /* Both x32 and x86_64 are considered "64-bit". */
- return AUDIT_ARCH_X86_64;
+ /* x32 tasks should be considered AUDIT_ARCH_X86_64. */
+ return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
}
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8b7c8d8..2aaca53 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -52,21 +52,6 @@
#include <asm/cpufeature.h>
#include <linux/atomic.h>
-struct thread_info {
- struct task_struct *task; /* main task structure */
- __u32 flags; /* low level flags */
- __u32 status; /* thread synchronous flags */
- __u32 cpu; /* current CPU */
-};
-
-#define INIT_THREAD_INFO(tsk) \
-{ \
- .task = &tsk, \
- .flags = 0, \
- .cpu = 0, \
-}
-
-#define init_thread_info (init_thread_union.thread_info)
#define init_stack (init_thread_union.stack)
#else /* !__ASSEMBLY__ */
@@ -95,7 +80,6 @@
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
-#define TIF_FORK 18 /* ret_from_fork */
#define TIF_NOHZ 19 /* in adaptive nohz mode */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
@@ -119,7 +103,6 @@
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
-#define _TIF_FORK (1 << TIF_FORK)
#define _TIF_NOHZ (1 << TIF_NOHZ)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
@@ -160,11 +143,6 @@
*/
#ifndef __ASSEMBLY__
-static inline struct thread_info *current_thread_info(void)
-{
- return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
-}
-
static inline unsigned long current_stack_pointer(void)
{
unsigned long sp;
@@ -226,60 +204,19 @@
# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
#endif
-/*
- * ASM operand which evaluates to a 'thread_info' address of
- * the current task, if it is known that "reg" is exactly "off"
- * bytes below the top of the stack currently.
- *
- * ( The kernel stack's size is known at build time, it is usually
- * 2 or 4 pages, and the bottom of the kernel stack contains
- * the thread_info structure. So to access the thread_info very
- * quickly from assembly code we can calculate down from the
- * top of the kernel stack to the bottom, using constant,
- * build-time calculations only. )
- *
- * For example, to fetch the current thread_info->flags value into %eax
- * on x86-64 defconfig kernels, in syscall entry code where RSP is
- * currently at exactly SIZEOF_PTREGS bytes away from the top of the
- * stack:
- *
- * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
- *
- * will translate to:
- *
- * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax
- *
- * which is below the current RSP by almost 16K.
- */
-#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
-
#endif
-/*
- * Thread-synchronous status.
- *
- * This is different from the flags in that nobody else
- * ever touches our thread-synchronous status, so we don't
- * have to worry about atomic accesses.
- */
-#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
#ifdef CONFIG_COMPAT
#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
#endif
-
#ifndef __ASSEMBLY__
-static inline bool in_ia32_syscall(void)
-{
#ifdef CONFIG_X86_32
- return true;
+#define in_ia32_syscall() true
+#else
+#define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
+ current->thread.status & TS_COMPAT)
#endif
-#ifdef CONFIG_IA32_EMULATION
- if (current_thread_info()->status & TS_COMPAT)
- return true;
-#endif
- return false;
-}
/*
* Force syscall return via IRET by making it look as if there was
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index c3496619..01fd0a7 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -117,6 +117,12 @@
extern void ist_begin_non_atomic(struct pt_regs *regs);
extern void ist_end_non_atomic(void);
+#ifdef CONFIG_VMAP_STACK
+void __noreturn handle_stack_overflow(const char *message,
+ struct pt_regs *regs,
+ unsigned long fault_address);
+#endif
+
/* Interrupts/Exceptions */
enum {
X86_TRAP_DE = 0, /* 0, Divide-by-zero */
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
new file mode 100644
index 0000000..c4b6d1c
--- /dev/null
+++ b/arch/x86/include/asm/unwind.h
@@ -0,0 +1,73 @@
+#ifndef _ASM_X86_UNWIND_H
+#define _ASM_X86_UNWIND_H
+
+#include <linux/sched.h>
+#include <linux/ftrace.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+
+struct unwind_state {
+ struct stack_info stack_info;
+ unsigned long stack_mask;
+ struct task_struct *task;
+ int graph_idx;
+#ifdef CONFIG_FRAME_POINTER
+ unsigned long *bp;
+#else
+ unsigned long *sp;
+#endif
+};
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+ struct pt_regs *regs, unsigned long *first_frame);
+
+bool unwind_next_frame(struct unwind_state *state);
+
+static inline bool unwind_done(struct unwind_state *state)
+{
+ return state->stack_info.type == STACK_TYPE_UNKNOWN;
+}
+
+static inline
+void unwind_start(struct unwind_state *state, struct task_struct *task,
+ struct pt_regs *regs, unsigned long *first_frame)
+{
+ first_frame = first_frame ? : get_stack_pointer(task, regs);
+
+ __unwind_start(state, task, regs, first_frame);
+}
+
+#ifdef CONFIG_FRAME_POINTER
+
+static inline
+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+{
+ if (unwind_done(state))
+ return NULL;
+
+ return state->bp + 1;
+}
+
+unsigned long unwind_get_return_address(struct unwind_state *state);
+
+#else /* !CONFIG_FRAME_POINTER */
+
+static inline
+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+{
+ return NULL;
+}
+
+static inline
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+ if (unwind_done(state))
+ return 0;
+
+ return ftrace_graph_ret_addr(state->task, &state->graph_idx,
+ *state->sp, state->sp);
+}
+
+#endif /* CONFIG_FRAME_POINTER */
+
+#endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index cc44d92..57ab86d 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -49,14 +49,12 @@
#define UV_NET_ENDPOINT_INTD (is_uv1_hub() ? \
UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD)
#define UV_DESC_PSHIFT 49
-#define UV_PAYLOADQ_PNODE_SHIFT 49
+#define UV_PAYLOADQ_GNODE_SHIFT 49
#define UV_PTC_BASENAME "sgi_uv/ptc_statistics"
#define UV_BAU_BASENAME "sgi_uv/bau_tunables"
#define UV_BAU_TUNABLES_DIR "sgi_uv"
#define UV_BAU_TUNABLES_FILE "bau_tunables"
#define WHITESPACE " \t\n"
-#define uv_mmask ((1UL << uv_hub_info->m_val) - 1)
-#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask))
#define cpubit_isset(cpu, bau_local_cpumask) \
test_bit((cpu), (bau_local_cpumask).bits)
@@ -387,6 +385,17 @@
/* bits 127:120 */
};
+/* Abstracted BAU functions */
+struct bau_operations {
+ unsigned long (*read_l_sw_ack)(void);
+ unsigned long (*read_g_sw_ack)(int pnode);
+ unsigned long (*bau_gpa_to_offset)(unsigned long vaddr);
+ void (*write_l_sw_ack)(unsigned long mmr);
+ void (*write_g_sw_ack)(int pnode, unsigned long mmr);
+ void (*write_payload_first)(int pnode, unsigned long mmr);
+ void (*write_payload_last)(int pnode, unsigned long mmr);
+};
+
/*
* The activation descriptor:
* The format of the message to send, plus all accompanying control
@@ -655,6 +664,16 @@
write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image);
}
+static inline void write_mmr_proc_payload_first(int pnode, unsigned long mmr_image)
+{
+ write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_FIRST, mmr_image);
+}
+
+static inline void write_mmr_proc_payload_last(int pnode, unsigned long mmr_image)
+{
+ write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_LAST, mmr_image);
+}
+
static inline void write_mmr_payload_first(int pnode, unsigned long mmr_image)
{
write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image);
@@ -700,6 +719,26 @@
return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
}
+static inline void write_mmr_proc_sw_ack(unsigned long mr)
+{
+ uv_write_local_mmr(UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr);
+}
+
+static inline void write_gmmr_proc_sw_ack(int pnode, unsigned long mr)
+{
+ write_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr);
+}
+
+static inline unsigned long read_mmr_proc_sw_ack(void)
+{
+ return read_lmmr(UV4H_LB_PROC_INTD_SOFT_ACK_PENDING);
+}
+
+static inline unsigned long read_gmmr_proc_sw_ack(int pnode)
+{
+ return read_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_PENDING);
+}
+
static inline void write_mmr_data_config(int pnode, unsigned long mr)
{
uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr);
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 43dc55b..2444189 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -41,6 +41,8 @@
extern void __init init_vdso_image(const struct vdso_image *image);
+extern int map_vdso_once(const struct vdso_image *image, unsigned long addr);
+
#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_VDSO_H */
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 2184943..69a6e07 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -26,6 +26,8 @@
__u32 socketid; /* CPU socket ID */
__u32 apicid; /* CPU initial apic ID */
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
+ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
+ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
};
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032..ae135de 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
#define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004
+#ifdef CONFIG_CHECKPOINT_RESTORE
+# define ARCH_MAP_VDSO_X32 0x2001
+# define ARCH_MAP_VDSO_32 0x2002
+# define ARCH_MAP_VDSO_64 0x2003
+#endif
+
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0503f5b..45257cf 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -125,6 +125,12 @@
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
+ifdef CONFIG_FRAME_POINTER
+obj-y += unwind_frame.o
+else
+obj-y += unwind_guess.o
+endif
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 3242e59..26b78d8 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,6 +1,7 @@
obj-$(CONFIG_ACPI) += boot.o
obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o
obj-$(CONFIG_ACPI_APEI) += apei.o
+obj-$(CONFIG_ACPI_CPPC_LIB) += cppc_msr.o
ifneq ($(CONFIG_ACPI_PROCESSOR),)
obj-y += cstate.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 90d84c3..32a7d70 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -176,15 +176,10 @@
return -EINVAL;
}
- if (!enabled) {
- ++disabled_cpus;
- return -EINVAL;
- }
-
if (boot_cpu_physical_apicid != -1U)
- ver = apic_version[boot_cpu_physical_apicid];
+ ver = boot_cpu_apic_version;
- cpu = generic_processor_info(id, ver);
+ cpu = __generic_processor_info(id, ver, enabled);
if (cpu >= 0)
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
@@ -282,6 +277,8 @@
if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
return -EINVAL;
+ acpi_table_print_madt_entry(header);
+
acpi_lapic_addr = lapic_addr_ovr->address;
return 0;
@@ -705,7 +702,7 @@
#ifdef CONFIG_ACPI_HOTPLUG_CPU
#include <acpi/processor.h>
-static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
{
#ifdef CONFIG_ACPI_NUMA
int nid;
@@ -716,6 +713,7 @@
numa_set_node(cpu, nid);
}
#endif
+ return 0;
}
int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
@@ -998,21 +996,6 @@
if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
- /*
- * Note that the LAPIC address is obtained from the MADT (32-bit value)
- * and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value).
- */
-
- count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
- acpi_parse_lapic_addr_ovr, 0);
- if (count < 0) {
- printk(KERN_ERR PREFIX
- "Error parsing LAPIC address override entry\n");
- return count;
- }
-
- register_lapic_address(acpi_lapic_addr);
-
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
acpi_parse_sapic, MAX_LOCAL_APIC);
@@ -1031,8 +1014,8 @@
return ret;
}
- x2count = madt_proc[0].count;
- count = madt_proc[1].count;
+ count = madt_proc[0].count;
+ x2count = madt_proc[1].count;
}
if (!count && !x2count) {
printk(KERN_ERR PREFIX "No LAPIC entries present\n");
@@ -1513,7 +1496,7 @@
* If acpi_disabled, bail out
*/
if (acpi_disabled)
- return;
+ return;
/*
* Initialize the ACPI boot-time table parser.
diff --git a/arch/x86/kernel/acpi/cppc_msr.c b/arch/x86/kernel/acpi/cppc_msr.c
new file mode 100644
index 0000000..6fb478b
--- /dev/null
+++ b/arch/x86/kernel/acpi/cppc_msr.c
@@ -0,0 +1,58 @@
+/*
+ * cppc_msr.c: MSR Interface for CPPC
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <acpi/cppc_acpi.h>
+#include <asm/msr.h>
+
+/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */
+
+bool cpc_ffh_supported(void)
+{
+ return true;
+}
+
+int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
+{
+ int err;
+
+ err = rdmsrl_safe_on_cpu(cpunum, reg->address, val);
+ if (!err) {
+ u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
+ reg->bit_offset);
+
+ *val &= mask;
+ *val >>= reg->bit_offset;
+ }
+ return err;
+}
+
+int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+{
+ u64 rd_val;
+ int err;
+
+ err = rdmsrl_safe_on_cpu(cpunum, reg->address, &rd_val);
+ if (!err) {
+ u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
+ reg->bit_offset);
+
+ val <<= reg->bit_offset;
+ val &= mask;
+ rd_val &= ~mask;
+ rd_val |= val;
+ err = wrmsrl_safe_on_cpu(cpunum, reg->address, rd_val);
+ }
+ return err;
+}
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index adb3eaf..4858733 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -99,7 +99,7 @@
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
#ifdef CONFIG_SMP
- stack_start = (unsigned long)temp_stack + sizeof(temp_stack);
+ initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
(unsigned long)get_cpu_gdt_table(smp_processor_id());
initial_gs = per_cpu_offset(smp_processor_id());
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f3e9b2d..f266b8a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -64,6 +64,8 @@
unsigned int boot_cpu_physical_apicid = -1U;
EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
+u8 boot_cpu_apic_version;
+
/*
* The highest APIC ID seen during enumeration.
*/
@@ -1374,7 +1376,6 @@
* Actually disabling the focus CPU check just makes the hang less
* frequent as it makes the interrupt distributon model be more
* like LRU than MRU (the short-term load is more even across CPUs).
- * See also the comment in end_level_ioapic_irq(). --macro
*/
/*
@@ -1816,8 +1817,7 @@
* since smp_sanity_check is prepared for such a case
* and disable smp mode
*/
- apic_version[new_apicid] =
- GET_APIC_VERSION(apic_read(APIC_LVR));
+ boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
}
}
@@ -1828,17 +1828,14 @@
if (!x2apic_mode) {
set_fixmap_nocache(FIX_APIC_BASE, address);
apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
- APIC_BASE, mp_lapic_addr);
+ APIC_BASE, address);
}
if (boot_cpu_physical_apicid == -1U) {
boot_cpu_physical_apicid = read_apic_id();
- apic_version[boot_cpu_physical_apicid] =
- GET_APIC_VERSION(apic_read(APIC_LVR));
+ boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
}
}
-int apic_version[MAX_LOCAL_APIC];
-
/*
* Local APIC interrupts
*/
@@ -2027,7 +2024,53 @@
apic_write(APIC_LVT1, value);
}
-int generic_processor_info(int apicid, int version)
+/*
+ * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated
+ * contiguously, it equals to current allocated max logical CPU ID plus 1.
+ * All allocated CPU ID should be in [0, nr_logical_cpuidi), so the maximum of
+ * nr_logical_cpuids is nr_cpu_ids.
+ *
+ * NOTE: Reserve 0 for BSP.
+ */
+static int nr_logical_cpuids = 1;
+
+/*
+ * Used to store mapping between logical CPU IDs and APIC IDs.
+ */
+static int cpuid_to_apicid[] = {
+ [0 ... NR_CPUS - 1] = -1,
+};
+
+/*
+ * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
+ * and cpuid_to_apicid[] synchronized.
+ */
+static int allocate_logical_cpuid(int apicid)
+{
+ int i;
+
+ /*
+ * cpuid <-> apicid mapping is persistent, so when a cpu is up,
+ * check if the kernel has allocated a cpuid for it.
+ */
+ for (i = 0; i < nr_logical_cpuids; i++) {
+ if (cpuid_to_apicid[i] == apicid)
+ return i;
+ }
+
+ /* Allocate a new cpuid. */
+ if (nr_logical_cpuids >= nr_cpu_ids) {
+ WARN_ONCE(1, "Only %d processors supported."
+ "Processor %d/0x%x and the rest are ignored.\n",
+ nr_cpu_ids - 1, nr_logical_cpuids, apicid);
+ return -1;
+ }
+
+ cpuid_to_apicid[nr_logical_cpuids] = apicid;
+ return nr_logical_cpuids++;
+}
+
+int __generic_processor_info(int apicid, int version, bool enabled)
{
int cpu, max = nr_cpu_ids;
bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
@@ -2102,8 +2145,16 @@
* for BSP.
*/
cpu = 0;
- } else
- cpu = cpumask_next_zero(-1, cpu_present_mask);
+
+ /* Logical cpuid 0 is reserved for BSP. */
+ cpuid_to_apicid[0] = apicid;
+ } else {
+ cpu = allocate_logical_cpuid(apicid);
+ if (cpu < 0) {
+ disabled_cpus++;
+ return -EINVAL;
+ }
+ }
/*
* This can happen on physical hotplug. The sanity check at boot time
@@ -2120,8 +2171,6 @@
return -ENOSPC;
}
- num_processors++;
-
/*
* Validate version
*/
@@ -2130,14 +2179,12 @@
cpu, apicid);
version = 0x10;
}
- apic_version[apicid] = version;
- if (version != apic_version[boot_cpu_physical_apicid]) {
+ if (version != boot_cpu_apic_version) {
pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
- apic_version[boot_cpu_physical_apicid], cpu, version);
+ boot_cpu_apic_version, cpu, version);
}
- physid_set(apicid, phys_cpu_present_map);
if (apicid > max_physical_apicid)
max_physical_apicid = apicid;
@@ -2150,11 +2197,23 @@
apic->x86_32_early_logical_apicid(cpu);
#endif
set_cpu_possible(cpu, true);
- set_cpu_present(cpu, true);
+
+ if (enabled) {
+ num_processors++;
+ physid_set(apicid, phys_cpu_present_map);
+ set_cpu_present(cpu, true);
+ } else {
+ disabled_cpus++;
+ }
return cpu;
}
+int generic_processor_info(int apicid, int version)
+{
+ return __generic_processor_info(apicid, version, true);
+}
+
int hard_smp_processor_id(void)
{
return read_apic_id();
@@ -2277,7 +2336,7 @@
* Complain if the BIOS pretends there is one.
*/
if (!boot_cpu_has(X86_FEATURE_APIC) &&
- APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+ APIC_INTEGRATED(boot_cpu_apic_version)) {
pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
boot_cpu_physical_apicid);
return -1;
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 5b2ae10..a4d7ff2 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -25,7 +25,7 @@
static struct apic apic_physflat;
static struct apic apic_flat;
-struct apic __read_mostly *apic = &apic_flat;
+struct apic *apic __ro_after_init = &apic_flat;
EXPORT_SYMBOL_GPL(apic);
static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
@@ -116,27 +116,17 @@
static unsigned int flat_get_apic_id(unsigned long x)
{
- unsigned int id;
-
- id = (((x)>>24) & 0xFFu);
-
- return id;
+ return (x >> 24) & 0xFF;
}
static unsigned long set_apic_id(unsigned int id)
{
- unsigned long x;
-
- x = ((id & 0xFFu)<<24);
- return x;
+ return (id & 0xFF) << 24;
}
static unsigned int read_xapic_id(void)
{
- unsigned int id;
-
- id = flat_get_apic_id(apic_read(APIC_ID));
- return id;
+ return flat_get_apic_id(apic_read(APIC_ID));
}
static int flat_apic_id_registered(void)
@@ -154,7 +144,7 @@
return 1;
}
-static struct apic apic_flat = {
+static struct apic apic_flat __ro_after_init = {
.name = "flat",
.probe = flat_probe,
.acpi_madt_oem_check = flat_acpi_madt_oem_check,
@@ -248,7 +238,7 @@
return 0;
}
-static struct apic apic_physflat = {
+static struct apic apic_physflat __ro_after_init = {
.name = "physical flat",
.probe = physflat_probe,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index c05688b..b109e43 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -108,7 +108,7 @@
WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
}
-struct apic apic_noop = {
+struct apic apic_noop __ro_after_init = {
.name = "noop",
.probe = noop_probe,
.acpi_madt_oem_check = NULL,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 714d4fd..e08fe2c 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -40,10 +40,7 @@
static unsigned long numachip1_set_apic_id(unsigned int id)
{
- unsigned long x;
-
- x = ((id & 0xffU) << 24);
- return x;
+ return (id & 0xff) << 24;
}
static unsigned int numachip2_get_apic_id(unsigned long x)
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 06dbaa4..5601201 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -142,7 +142,7 @@
return dmi_bigsmp;
}
-static struct apic apic_bigsmp = {
+static struct apic apic_bigsmp __ro_after_init = {
.name = "bigsmp",
.probe = probe_bigsmp,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7491f41..48e6d84 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1593,7 +1593,7 @@
* no meaning without the serial APIC bus.
*/
if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ || APIC_XAPIC(boot_cpu_apic_version))
return;
setup_ioapic_ids_from_mpc_nocheck();
}
@@ -2423,7 +2423,7 @@
static u8 io_apic_unique_id(int idx, u8 id)
{
if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
- !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ !APIC_XAPIC(boot_cpu_apic_version))
return io_apic_get_unique_id(idx, id);
else
return id;
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index ade2532..015bbf3 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -269,7 +269,7 @@
hpet_msi_write(irq_data_get_irq_handler_data(data), msg);
}
-static struct irq_chip hpet_msi_controller = {
+static struct irq_chip hpet_msi_controller __ro_after_init = {
.name = "HPET-MSI",
.irq_unmask = hpet_msi_unmask,
.irq_mask = hpet_msi_mask,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 7c43e71..c48264e 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -72,7 +72,7 @@
return 1;
}
-static struct apic apic_default = {
+static struct apic apic_default __ro_after_init = {
.name = "default",
.probe = probe_default,
@@ -126,7 +126,7 @@
apic_driver(apic_default);
-struct apic *apic = &apic_default;
+struct apic *apic __ro_after_init = &apic_default;
EXPORT_SYMBOL_GPL(apic);
static int cmdline_apic __initdata;
@@ -152,7 +152,7 @@
void __init default_setup_apic_routing(void)
{
- int version = apic_version[boot_cpu_physical_apicid];
+ int version = boot_cpu_apic_version;
if (num_possible_cpus() > 8) {
switch (boot_cpu_data.x86_vendor) {
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 54f35d9..200af5a 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -227,7 +227,7 @@
cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
}
-static struct apic apic_x2apic_cluster = {
+static struct apic apic_x2apic_cluster __ro_after_init = {
.name = "cluster x2apic",
.probe = x2apic_cluster_probe,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 4f13f54f..ff111f0 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -98,7 +98,7 @@
return apic == &apic_x2apic_phys;
}
-static struct apic apic_x2apic_phys = {
+static struct apic apic_x2apic_phys __ro_after_init = {
.name = "physical x2apic",
.probe = x2apic_phys_probe,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index cb0673c..aeef53c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -533,11 +533,8 @@
static unsigned long set_apic_id(unsigned int id)
{
- unsigned long x;
-
- /* maskout x2apic_extra_bits ? */
- x = id;
- return x;
+ /* CHECKME: Do we need to mask out the xapic extra bits? */
+ return id;
}
static unsigned int uv_read_apic_id(void)
@@ -560,7 +557,7 @@
return apic == &apic_x2apic_uv_x;
}
-static struct apic __refdata apic_x2apic_uv_x = {
+static struct apic apic_x2apic_uv_x __ro_after_init = {
.name = "UV large system",
.probe = uv_probe,
@@ -927,7 +924,7 @@
mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
}
-static void uv_heartbeat_enable(int cpu)
+static int uv_heartbeat_enable(unsigned int cpu)
{
while (!uv_cpu_scir_info(cpu)->enabled) {
struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
@@ -941,43 +938,24 @@
/* also ensure that boot cpu is enabled */
cpu = 0;
}
+ return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
-static void uv_heartbeat_disable(int cpu)
+static int uv_heartbeat_disable(unsigned int cpu)
{
if (uv_cpu_scir_info(cpu)->enabled) {
uv_cpu_scir_info(cpu)->enabled = 0;
del_timer(&uv_cpu_scir_info(cpu)->timer);
}
uv_set_cpu_scir_bits(cpu, 0xff);
-}
-
-/*
- * cpu hotplug notifier
- */
-static int uv_scir_cpu_notify(struct notifier_block *self, unsigned long action,
- void *hcpu)
-{
- long cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DOWN_FAILED:
- case CPU_ONLINE:
- uv_heartbeat_enable(cpu);
- break;
- case CPU_DOWN_PREPARE:
- uv_heartbeat_disable(cpu);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
+ return 0;
}
static __init void uv_scir_register_cpu_notifier(void)
{
- hotcpu_notifier(uv_scir_cpu_notify, 0);
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/x2apic-uvx:online",
+ uv_heartbeat_enable, uv_heartbeat_disable);
}
#else /* !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 2bd5c6f..c62e015 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -29,10 +29,13 @@
void common(void) {
BLANK();
- OFFSET(TI_flags, thread_info, flags);
- OFFSET(TI_status, thread_info, status);
+ OFFSET(TASK_threadsp, task_struct, thread.sp);
+#ifdef CONFIG_CC_STACKPROTECTOR
+ OFFSET(TASK_stack_canary, task_struct, stack_canary);
+#endif
BLANK();
+ OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
BLANK();
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index ecdc1d2..880aa09 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -57,6 +57,11 @@
/* Size of SYSENTER_stack */
DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+#ifdef CONFIG_CC_STACKPROTECTOR
+ BLANK();
+ OFFSET(stack_canary_offset, stack_canary, canary);
+#endif
+
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d875f97..210927e 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -56,6 +56,11 @@
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
BLANK();
+#ifdef CONFIG_CC_STACKPROTECTOR
+ DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
+ BLANK();
+#endif
+
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
DEFINE(NR_syscalls, sizeof(syscalls_64));
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 809eda0..9bd910a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -804,21 +804,20 @@
identify_cpu_without_cpuid(c);
/* cyrix could have cpuid enabled via c_identify()*/
- if (!have_cpuid_p())
- return;
+ if (have_cpuid_p()) {
+ cpu_detect(c);
+ get_cpu_vendor(c);
+ get_cpu_cap(c);
- cpu_detect(c);
- get_cpu_vendor(c);
- get_cpu_cap(c);
+ if (this_cpu->c_early_init)
+ this_cpu->c_early_init(c);
- if (this_cpu->c_early_init)
- this_cpu->c_early_init(c);
+ c->cpu_index = 0;
+ filter_cpuid_features(c, false);
- c->cpu_index = 0;
- filter_cpuid_features(c, false);
-
- if (this_cpu->c_bsp_init)
- this_cpu->c_bsp_init(c);
+ if (this_cpu->c_bsp_init)
+ this_cpu->c_bsp_init(c);
+ }
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
fpu__init_system(c);
@@ -1265,9 +1264,14 @@
__setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
-struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
-struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
- (unsigned long) debug_idt_table };
+struct desc_ptr idt_descr __ro_after_init = {
+ .size = NR_VECTORS * 16 - 1,
+ .address = (unsigned long) idt_table,
+};
+const struct desc_ptr debug_idt_descr = {
+ .size = NR_VECTORS * 16 - 1,
+ .address = (unsigned long) debug_idt_table,
+};
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE) __visible;
@@ -1281,7 +1285,7 @@
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(char *, irq_stack_ptr) =
- init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+ init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
@@ -1305,11 +1309,6 @@
/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
- /*
- * LSTAR and STAR live in a bit strange symbiosis.
- * They both write to the same internal register. STAR allows to
- * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
- */
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 27e4665..35691a6 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -86,3 +86,14 @@
x86_hyper->x2apic_available &&
x86_hyper->x2apic_available();
}
+
+void hypervisor_pin_vcpu(int cpu)
+{
+ if (!x86_hyper)
+ return;
+
+ if (x86_hyper->pin_vcpu)
+ x86_hyper->pin_vcpu(cpu);
+ else
+ WARN_ONCE(1, "vcpu pinning requested but not supported!\n");
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 79d8ec8..a7fdf45 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -41,6 +41,7 @@
#include <linux/debugfs.h>
#include <linux/irq_work.h>
#include <linux/export.h>
+#include <linux/jump_label.h>
#include <asm/processor.h>
#include <asm/traps.h>
@@ -292,6 +293,13 @@
if (m->misc)
pr_cont("MISC %llx ", m->misc);
+ if (mce_flags.smca) {
+ if (m->synd)
+ pr_cont("SYND %llx ", m->synd);
+ if (m->ipid)
+ pr_cont("IPID %llx ", m->ipid);
+ }
+
pr_cont("\n");
/*
* Note this output is parsed by external tools and old fields
@@ -568,6 +576,7 @@
{
if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(msr_ops.misc(i));
+
if (m->status & MCI_STATUS_ADDRV) {
m->addr = mce_rdmsrl(msr_ops.addr(i));
@@ -579,6 +588,23 @@
m->addr >>= shift;
m->addr <<= shift;
}
+
+ /*
+ * Extract [55:<lsb>] where lsb is the least significant
+ * *valid* bit of the address bits.
+ */
+ if (mce_flags.smca) {
+ u8 lsb = (m->addr >> 56) & 0x3f;
+
+ m->addr &= GENMASK_ULL(55, lsb);
+ }
+ }
+
+ if (mce_flags.smca) {
+ m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
+
+ if (m->status & MCI_STATUS_SYNDV)
+ m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
}
}
@@ -1633,17 +1659,6 @@
if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu;
- /*
- * MCG_CAP.MCG_SER_P is necessary but not sufficient to know
- * whether this processor will actually generate recoverable
- * machine checks. Check to see if this is an E7 model Xeon.
- * We can't do a model number check because E5 and E7 use the
- * same model number. E5 doesn't support recovery, E7 does.
- */
- if (mca_cfg.recovery || (mca_cfg.ser &&
- !strncmp(c->x86_model_id,
- "Intel(R) Xeon(R) CPU E7-", 24)))
- set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
}
if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = 0;
@@ -2080,6 +2095,7 @@
* mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
* mce=nobootlog Don't log MCEs from before booting.
* mce=bios_cmci_threshold Don't program the CMCI threshold
+ * mce=recovery force enable memcpy_mcsafe()
*/
static int __init mcheck_enable(char *str)
{
@@ -2676,8 +2692,14 @@
static int __init mcheck_debugfs_init(void) { return -EINVAL; }
#endif
+DEFINE_STATIC_KEY_FALSE(mcsafe_key);
+EXPORT_SYMBOL_GPL(mcsafe_key);
+
static int __init mcheck_late_init(void)
{
+ if (mca_cfg.recovery)
+ static_branch_inc(&mcsafe_key);
+
mcheck_debugfs_init();
/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 7b7f3be..9b54034 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -20,6 +20,7 @@
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/smp.h>
+#include <linux/string.h>
#include <asm/amd_nb.h>
#include <asm/apic.h>
@@ -63,34 +64,71 @@
"execution_unit",
};
-/* Define HWID to IP type mappings for Scalable MCA */
-struct amd_hwid amd_hwids[] = {
- [SMCA_F17H_CORE] = { "f17h_core", 0xB0 },
- [SMCA_DF] = { "data_fabric", 0x2E },
- [SMCA_UMC] = { "umc", 0x96 },
- [SMCA_PB] = { "param_block", 0x5 },
- [SMCA_PSP] = { "psp", 0xFF },
- [SMCA_SMU] = { "smu", 0x1 },
+static const char * const smca_umc_block_names[] = {
+ "dram_ecc",
+ "misc_umc"
};
-EXPORT_SYMBOL_GPL(amd_hwids);
-const char * const amd_core_mcablock_names[] = {
- [SMCA_LS] = "load_store",
- [SMCA_IF] = "insn_fetch",
- [SMCA_L2_CACHE] = "l2_cache",
- [SMCA_DE] = "decode_unit",
- [RES] = "",
- [SMCA_EX] = "execution_unit",
- [SMCA_FP] = "floating_point",
- [SMCA_L3_CACHE] = "l3_cache",
+struct smca_bank_name smca_bank_names[] = {
+ [SMCA_LS] = { "load_store", "Load Store Unit" },
+ [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
+ [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
+ [SMCA_DE] = { "decode_unit", "Decode Unit" },
+ [SMCA_EX] = { "execution_unit", "Execution Unit" },
+ [SMCA_FP] = { "floating_point", "Floating Point Unit" },
+ [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
+ [SMCA_CS] = { "coherent_slave", "Coherent Slave" },
+ [SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
+ [SMCA_UMC] = { "umc", "Unified Memory Controller" },
+ [SMCA_PB] = { "param_block", "Parameter Block" },
+ [SMCA_PSP] = { "psp", "Platform Security Processor" },
+ [SMCA_SMU] = { "smu", "System Management Unit" },
};
-EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
+EXPORT_SYMBOL_GPL(smca_bank_names);
-const char * const amd_df_mcablock_names[] = {
- [SMCA_CS] = "coherent_slave",
- [SMCA_PIE] = "pie",
+static struct smca_hwid_mcatype smca_hwid_mcatypes[] = {
+ /* { bank_type, hwid_mcatype, xec_bitmap } */
+
+ /* ZN Core (HWID=0xB0) MCA types */
+ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
+ { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
+ { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
+ { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
+ /* HWID 0xB0 MCATYPE 0x4 is Reserved */
+ { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF },
+ { SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
+ { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
+
+ /* Data Fabric MCA types */
+ { SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
+ { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF },
+
+ /* Unified Memory Controller MCA type */
+ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F },
+
+ /* Parameter Block MCA type */
+ { SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
+
+ /* Platform Security Processor MCA type */
+ { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
+
+ /* System Management Unit MCA type */
+ { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
};
-EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
+
+struct smca_bank_info smca_banks[MAX_NR_BANKS];
+EXPORT_SYMBOL_GPL(smca_banks);
+
+/*
+ * In SMCA enabled processors, we can have multiple banks for a given IP type.
+ * So to define a unique name for each bank, we use a temp c-string to append
+ * the MCA_IPID[InstanceId] to type's name in get_name().
+ *
+ * InstanceId is 32 bits which is 8 characters. Make sure MAX_MCATYPE_NAME_LEN
+ * is greater than 8 plus 1 (for underscore) plus length of longest type name.
+ */
+#define MAX_MCATYPE_NAME_LEN 30
+static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
@@ -108,6 +146,36 @@
* CPU Initialization
*/
+static void get_smca_bank_info(unsigned int bank)
+{
+ unsigned int i, hwid_mcatype, cpu = smp_processor_id();
+ struct smca_hwid_mcatype *type;
+ u32 high, instanceId;
+ u16 hwid, mcatype;
+
+ /* Collect bank_info using CPU 0 for now. */
+ if (cpu)
+ return;
+
+ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) {
+ pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
+ return;
+ }
+
+ hwid = high & MCI_IPID_HWID;
+ mcatype = (high & MCI_IPID_MCATYPE) >> 16;
+ hwid_mcatype = HWID_MCATYPE(hwid, mcatype);
+
+ for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
+ type = &smca_hwid_mcatypes[i];
+ if (hwid_mcatype == type->hwid_mcatype) {
+ smca_banks[bank].type = type;
+ smca_banks[bank].type_instance = instanceId;
+ break;
+ }
+ }
+}
+
struct thresh_restart {
struct threshold_block *b;
int reset;
@@ -293,7 +361,7 @@
wrmsr(MSR_CU_DEF_ERR, low, high);
}
-static u32 get_block_address(u32 current_addr, u32 low, u32 high,
+static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block)
{
u32 addr = 0, offset = 0;
@@ -309,13 +377,13 @@
*/
u32 low, high;
- if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
return addr;
if (!(low & MCI_CONFIG_MCAX))
return addr;
- if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+ if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
(low & MASK_BLKPTR_LO))
addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
}
@@ -395,6 +463,20 @@
*/
smca_high &= ~BIT(2);
+ /*
+ * SMCA sets the Deferred Error Interrupt type per bank.
+ *
+ * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us
+ * if the DeferredIntType bit field is available.
+ *
+ * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the
+ * high portion of the MSR). OS should set this to 0x1 to enable
+ * APIC based interrupt. First, check that no interrupt has been
+ * set.
+ */
+ if ((smca_low & BIT(5)) && !((smca_high >> 5) & 0x3))
+ smca_high |= BIT(5);
+
wrmsr(smca_addr, smca_low, smca_high);
}
@@ -421,12 +503,15 @@
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
u32 low = 0, high = 0, address = 0;
- unsigned int bank, block;
+ unsigned int bank, block, cpu = smp_processor_id();
int offset = -1;
for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ if (mce_flags.smca)
+ get_smca_bank_info(bank);
+
for (block = 0; block < NR_BLOCKS; ++block) {
- address = get_block_address(address, low, high, bank, block);
+ address = get_block_address(cpu, address, low, high, bank, block);
if (!address)
break;
@@ -476,9 +561,27 @@
if (threshold_err)
m.misc = misc;
- if (m.status & MCI_STATUS_ADDRV)
+ if (m.status & MCI_STATUS_ADDRV) {
rdmsrl(msr_addr, m.addr);
+ /*
+ * Extract [55:<lsb>] where lsb is the least significant
+ * *valid* bit of the address bits.
+ */
+ if (mce_flags.smca) {
+ u8 lsb = (m.addr >> 56) & 0x3f;
+
+ m.addr &= GENMASK_ULL(55, lsb);
+ }
+ }
+
+ if (mce_flags.smca) {
+ rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
+
+ if (m.status & MCI_STATUS_SYNDV)
+ rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+ }
+
mce_log(&m);
wrmsrl(msr_status, 0);
@@ -541,15 +644,14 @@
static void amd_threshold_interrupt(void)
{
u32 low = 0, high = 0, address = 0;
- int cpu = smp_processor_id();
- unsigned int bank, block;
+ unsigned int bank, block, cpu = smp_processor_id();
/* assume first bank caused it */
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
for (block = 0; block < NR_BLOCKS; ++block) {
- address = get_block_address(address, low, high, bank, block);
+ address = get_block_address(cpu, address, low, high, bank, block);
if (!address)
break;
@@ -713,6 +815,34 @@
.default_attrs = default_attrs,
};
+static const char *get_name(unsigned int bank, struct threshold_block *b)
+{
+ unsigned int bank_type;
+
+ if (!mce_flags.smca) {
+ if (b && bank == 4)
+ return bank4_names(b);
+
+ return th_names[bank];
+ }
+
+ if (!smca_banks[bank].type)
+ return NULL;
+
+ bank_type = smca_banks[bank].type->bank_type;
+
+ if (b && bank_type == SMCA_UMC) {
+ if (b->block < ARRAY_SIZE(smca_umc_block_names))
+ return smca_umc_block_names[b->block];
+ return NULL;
+ }
+
+ snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
+ "%s_%x", smca_bank_names[bank_type].name,
+ smca_banks[bank].type_instance);
+ return buf_mcatype;
+}
+
static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
unsigned int block, u32 address)
{
@@ -767,11 +897,11 @@
err = kobject_init_and_add(&b->kobj, &threshold_ktype,
per_cpu(threshold_banks, cpu)[bank]->kobj,
- (bank == 4 ? bank4_names(b) : th_names[bank]));
+ get_name(bank, b));
if (err)
goto out_free;
recurse:
- address = get_block_address(address, low, high, bank, ++block);
+ address = get_block_address(cpu, address, low, high, bank, ++block);
if (!address)
return 0;
@@ -822,7 +952,7 @@
struct device *dev = per_cpu(mce_device, cpu);
struct amd_northbridge *nb = NULL;
struct threshold_bank *b = NULL;
- const char *name = th_names[bank];
+ const char *name = get_name(bank, NULL);
int err = 0;
if (is_shared_bank(bank)) {
@@ -869,7 +999,7 @@
}
}
- err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank));
+ err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank));
if (!err)
goto out;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index df04b2d..5ce5155 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -558,55 +558,36 @@
.resume = mc_bp_resume,
};
-static int
-mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
+static int mc_cpu_online(unsigned int cpu)
{
- unsigned int cpu = (unsigned long)hcpu;
struct device *dev;
dev = get_cpu_device(cpu);
+ microcode_update_cpu(cpu);
+ pr_debug("CPU%d added\n", cpu);
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- microcode_update_cpu(cpu);
- pr_debug("CPU%d added\n", cpu);
- /*
- * "break" is missing on purpose here because we want to fall
- * through in order to create the sysfs group.
- */
+ if (sysfs_create_group(&dev->kobj, &mc_attr_group))
+ pr_err("Failed to create group for CPU%d\n", cpu);
+ return 0;
+}
- case CPU_DOWN_FAILED:
- if (sysfs_create_group(&dev->kobj, &mc_attr_group))
- pr_err("Failed to create group for CPU%d\n", cpu);
- break;
+static int mc_cpu_down_prep(unsigned int cpu)
+{
+ struct device *dev;
- case CPU_DOWN_PREPARE:
- /* Suspend is in progress, only remove the interface */
- sysfs_remove_group(&dev->kobj, &mc_attr_group);
- pr_debug("CPU%d removed\n", cpu);
- break;
-
+ dev = get_cpu_device(cpu);
+ /* Suspend is in progress, only remove the interface */
+ sysfs_remove_group(&dev->kobj, &mc_attr_group);
+ pr_debug("CPU%d removed\n", cpu);
/*
- * case CPU_DEAD:
- *
* When a CPU goes offline, don't free up or invalidate the copy of
* the microcode in kernel memory, so that we can reuse it when the
* CPU comes back online without unnecessarily requesting the userspace
* for it again.
*/
- }
-
- /* The CPU refused to come up during a system resume */
- if (action == CPU_UP_CANCELED_FROZEN)
- microcode_fini_cpu(cpu);
-
- return NOTIFY_OK;
+ return 0;
}
-static struct notifier_block mc_cpu_notifier = {
- .notifier_call = mc_cpu_callback,
-};
-
static struct attribute *cpu_root_microcode_attrs[] = {
&dev_attr_reload.attr,
NULL
@@ -665,7 +646,8 @@
goto out_ucode_group;
register_syscore_ops(&mc_syscore_ops);
- register_hotcpu_notifier(&mc_cpu_notifier);
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
+ mc_cpu_online, mc_cpu_down_prep);
pr_info("Microcode Update Driver: v" MICROCODE_VERSION
" <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 28f1b54..24e87e7 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -72,14 +72,14 @@
u64 size_or_mask, size_and_mask;
static bool mtrr_aps_delayed_init;
-static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
+static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init;
const struct mtrr_ops *mtrr_if;
static void set_mtrr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type);
-void set_mtrr_ops(const struct mtrr_ops *ops)
+void __init set_mtrr_ops(const struct mtrr_ops *ops)
{
if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
mtrr_ops[ops->vendor] = ops;
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 6c7ced0..ad8bd76 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -54,7 +54,7 @@
bool get_mtrr_state(void);
void mtrr_bp_pat_init(void);
-extern void set_mtrr_ops(const struct mtrr_ops *ops);
+extern void __init set_mtrr_ops(const struct mtrr_ops *ops);
extern u64 size_or_mask, size_and_mask;
extern const struct mtrr_ops *mtrr_if;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 92e8f0a..9b7cf5c 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -17,7 +17,7 @@
#include <linux/sysfs.h>
#include <asm/stacktrace.h>
-
+#include <asm/unwind.h>
int panic_on_unrecovered_nmi;
int panic_on_io_nmi;
@@ -25,11 +25,29 @@
int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
static int die_counter;
-static void printk_stack_address(unsigned long address, int reliable,
- void *data)
+bool in_task_stack(unsigned long *stack, struct task_struct *task,
+ struct stack_info *info)
{
+ unsigned long *begin = task_stack_page(task);
+ unsigned long *end = task_stack_page(task) + THREAD_SIZE;
+
+ if (stack < begin || stack >= end)
+ return false;
+
+ info->type = STACK_TYPE_TASK;
+ info->begin = begin;
+ info->end = end;
+ info->next_sp = NULL;
+
+ return true;
+}
+
+static void printk_stack_address(unsigned long address, int reliable,
+ char *log_lvl)
+{
+ touch_nmi_watchdog();
printk("%s [<%p>] %s%pB\n",
- (char *)data, (void *)address, reliable ? "" : "? ",
+ log_lvl, (void *)address, reliable ? "" : "? ",
(void *)address);
}
@@ -38,176 +56,120 @@
pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static void
-print_ftrace_graph_addr(unsigned long addr, void *data,
- const struct stacktrace_ops *ops,
- struct task_struct *task, int *graph)
+void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, char *log_lvl)
{
- unsigned long ret_addr;
- int index;
+ struct unwind_state state;
+ struct stack_info stack_info = {0};
+ unsigned long visit_mask = 0;
+ int graph_idx = 0;
- if (addr != (unsigned long)return_to_handler)
- return;
+ printk("%sCall Trace:\n", log_lvl);
- index = task->curr_ret_stack;
-
- if (!task->ret_stack || index < *graph)
- return;
-
- index -= *graph;
- ret_addr = task->ret_stack[index].ret;
-
- ops->address(data, ret_addr, 1);
-
- (*graph)++;
-}
-#else
-static inline void
-print_ftrace_graph_addr(unsigned long addr, void *data,
- const struct stacktrace_ops *ops,
- struct task_struct *task, int *graph)
-{ }
-#endif
-
-/*
- * x86-64 can have up to three kernel stacks:
- * process stack
- * interrupt stack
- * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
- */
-
-static inline int valid_stack_ptr(struct task_struct *task,
- void *p, unsigned int size, void *end)
-{
- void *t = task_stack_page(task);
- if (end) {
- if (p < end && p >= (end-THREAD_SIZE))
- return 1;
- else
- return 0;
- }
- return p >= t && p < t + THREAD_SIZE - size;
-}
-
-unsigned long
-print_context_stack(struct task_struct *task,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data,
- unsigned long *end, int *graph)
-{
- struct stack_frame *frame = (struct stack_frame *)bp;
+ unwind_start(&state, task, regs, stack);
/*
- * If we overflowed the stack into a guard page, jump back to the
- * bottom of the usable stack.
+ * Iterate through the stacks, starting with the current stack pointer.
+ * Each stack has a pointer to the next one.
+ *
+ * x86-64 can have several stacks:
+ * - task stack
+ * - interrupt stack
+ * - HW exception stacks (double fault, nmi, debug, mce)
+ *
+ * x86-32 can have up to three stacks:
+ * - task stack
+ * - softirq stack
+ * - hardirq stack
*/
- if ((unsigned long)task_stack_page(task) - (unsigned long)stack <
- PAGE_SIZE)
- stack = (unsigned long *)task_stack_page(task);
+ for (; stack; stack = stack_info.next_sp) {
+ const char *str_begin, *str_end;
- while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
- unsigned long addr;
+ /*
+ * If we overflowed the task stack into a guard page, jump back
+ * to the bottom of the usable stack.
+ */
+ if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
+ stack = task_stack_page(task);
- addr = *stack;
- if (__kernel_text_address(addr)) {
- if ((unsigned long) stack == bp + sizeof(long)) {
- ops->address(data, addr, 1);
- frame = frame->next_frame;
- bp = (unsigned long) frame;
- } else {
- ops->address(data, addr, 0);
- }
- print_ftrace_graph_addr(addr, data, ops, task, graph);
+ if (get_stack_info(stack, task, &stack_info, &visit_mask))
+ break;
+
+ stack_type_str(stack_info.type, &str_begin, &str_end);
+ if (str_begin)
+ printk("%s <%s> ", log_lvl, str_begin);
+
+ /*
+ * Scan the stack, printing any text addresses we find. At the
+ * same time, follow proper stack frames with the unwinder.
+ *
+ * Addresses found during the scan which are not reported by
+ * the unwinder are considered to be additional clues which are
+ * sometimes useful for debugging and are prefixed with '?'.
+ * This also serves as a failsafe option in case the unwinder
+ * goes off in the weeds.
+ */
+ for (; stack < stack_info.end; stack++) {
+ unsigned long real_addr;
+ int reliable = 0;
+ unsigned long addr = *stack;
+ unsigned long *ret_addr_p =
+ unwind_get_return_address_ptr(&state);
+
+ if (!__kernel_text_address(addr))
+ continue;
+
+ if (stack == ret_addr_p)
+ reliable = 1;
+
+ /*
+ * When function graph tracing is enabled for a
+ * function, its return address on the stack is
+ * replaced with the address of an ftrace handler
+ * (return_to_handler). In that case, before printing
+ * the "real" address, we want to print the handler
+ * address as an "unreliable" hint that function graph
+ * tracing was involved.
+ */
+ real_addr = ftrace_graph_ret_addr(task, &graph_idx,
+ addr, stack);
+ if (real_addr != addr)
+ printk_stack_address(addr, 0, log_lvl);
+ printk_stack_address(real_addr, reliable, log_lvl);
+
+ if (!reliable)
+ continue;
+
+ /*
+ * Get the next frame from the unwinder. No need to
+ * check for an error: if anything goes wrong, the rest
+ * of the addresses will just be printed as unreliable.
+ */
+ unwind_next_frame(&state);
}
- stack++;
+
+ if (str_end)
+ printk("%s <%s> ", log_lvl, str_end);
}
- return bp;
-}
-EXPORT_SYMBOL_GPL(print_context_stack);
-
-unsigned long
-print_context_stack_bp(struct task_struct *task,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data,
- unsigned long *end, int *graph)
-{
- struct stack_frame *frame = (struct stack_frame *)bp;
- unsigned long *ret_addr = &frame->return_address;
-
- while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) {
- unsigned long addr = *ret_addr;
-
- if (!__kernel_text_address(addr))
- break;
-
- if (ops->address(data, addr, 1))
- break;
- frame = frame->next_frame;
- ret_addr = &frame->return_address;
- print_ftrace_graph_addr(addr, data, ops, task, graph);
- }
-
- return (unsigned long)frame;
-}
-EXPORT_SYMBOL_GPL(print_context_stack_bp);
-
-static int print_trace_stack(void *data, char *name)
-{
- printk("%s <%s> ", (char *)data, name);
- return 0;
-}
-
-/*
- * Print one address/symbol entries per line.
- */
-static int print_trace_address(void *data, unsigned long addr, int reliable)
-{
- touch_nmi_watchdog();
- printk_stack_address(addr, reliable, data);
- return 0;
-}
-
-static const struct stacktrace_ops print_trace_ops = {
- .stack = print_trace_stack,
- .address = print_trace_address,
- .walk_stack = print_context_stack,
-};
-
-void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl)
-{
- printk("%sCall Trace:\n", log_lvl);
- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-}
-
-void show_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp)
-{
- show_trace_log_lvl(task, regs, stack, bp, "");
}
void show_stack(struct task_struct *task, unsigned long *sp)
{
- unsigned long bp = 0;
- unsigned long stack;
+ task = task ? : current;
/*
* Stack frames below this one aren't interesting. Don't show them
* if we're printing for %current.
*/
- if (!sp && (!task || task == current)) {
- sp = &stack;
- bp = stack_frame(current, NULL);
- }
+ if (!sp && task == current)
+ sp = get_stack_pointer(current, NULL);
- show_stack_log_lvl(task, NULL, sp, bp, "");
+ show_stack_log_lvl(task, NULL, sp, "");
}
void show_stack_regs(struct pt_regs *regs)
{
- show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, "");
+ show_stack_log_lvl(current, regs, NULL, "");
}
static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 0967571..06eb322 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,93 +16,121 @@
#include <asm/stacktrace.h>
-static void *is_irq_stack(void *p, void *irq)
+void stack_type_str(enum stack_type type, const char **begin, const char **end)
{
- if (p < irq || p >= (irq + THREAD_SIZE))
- return NULL;
- return irq + THREAD_SIZE;
+ switch (type) {
+ case STACK_TYPE_IRQ:
+ case STACK_TYPE_SOFTIRQ:
+ *begin = "IRQ";
+ *end = "EOI";
+ break;
+ default:
+ *begin = NULL;
+ *end = NULL;
+ }
}
-
-static void *is_hardirq_stack(unsigned long *stack, int cpu)
+static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
{
- void *irq = per_cpu(hardirq_stack, cpu);
+ unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack);
+ unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
- return is_irq_stack(stack, irq);
+ /*
+ * This is a software stack, so 'end' can be a valid stack pointer.
+ * It just means the stack is empty.
+ */
+ if (stack < begin || stack > end)
+ return false;
+
+ info->type = STACK_TYPE_IRQ;
+ info->begin = begin;
+ info->end = end;
+
+ /*
+ * See irq_32.c -- the next stack pointer is stored at the beginning of
+ * the stack.
+ */
+ info->next_sp = (unsigned long *)*begin;
+
+ return true;
}
-static void *is_softirq_stack(unsigned long *stack, int cpu)
+static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
{
- void *irq = per_cpu(softirq_stack, cpu);
+ unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack);
+ unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
- return is_irq_stack(stack, irq);
+ /*
+ * This is a software stack, so 'end' can be a valid stack pointer.
+ * It just means the stack is empty.
+ */
+ if (stack < begin || stack > end)
+ return false;
+
+ info->type = STACK_TYPE_SOFTIRQ;
+ info->begin = begin;
+ info->end = end;
+
+ /*
+ * The next stack pointer is stored at the beginning of the stack.
+ * See irq_32.c.
+ */
+ info->next_sp = (unsigned long *)*begin;
+
+ return true;
}
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data)
+int get_stack_info(unsigned long *stack, struct task_struct *task,
+ struct stack_info *info, unsigned long *visit_mask)
{
- const unsigned cpu = get_cpu();
- int graph = 0;
- u32 *prev_esp;
+ if (!stack)
+ goto unknown;
- if (!task)
- task = current;
+ task = task ? : current;
- if (!stack) {
- unsigned long dummy;
+ if (in_task_stack(stack, task, info))
+ goto recursion_check;
- stack = &dummy;
- if (task != current)
- stack = (unsigned long *)task->thread.sp;
+ if (task != current)
+ goto unknown;
+
+ if (in_hardirq_stack(stack, info))
+ goto recursion_check;
+
+ if (in_softirq_stack(stack, info))
+ goto recursion_check;
+
+ goto unknown;
+
+recursion_check:
+ /*
+ * Make sure we don't iterate through any given stack more than once.
+ * If it comes up a second time then there's something wrong going on:
+ * just break out and report an unknown stack type.
+ */
+ if (visit_mask) {
+ if (*visit_mask & (1UL << info->type))
+ goto unknown;
+ *visit_mask |= 1UL << info->type;
}
- if (!bp)
- bp = stack_frame(task, regs);
+ return 0;
- for (;;) {
- void *end_stack;
-
- end_stack = is_hardirq_stack(stack, cpu);
- if (!end_stack)
- end_stack = is_softirq_stack(stack, cpu);
-
- bp = ops->walk_stack(task, stack, bp, ops, data,
- end_stack, &graph);
-
- /* Stop if not on irq stack */
- if (!end_stack)
- break;
-
- /* The previous esp is saved on the bottom of the stack */
- prev_esp = (u32 *)(end_stack - THREAD_SIZE);
- stack = (unsigned long *)*prev_esp;
- if (!stack)
- break;
-
- if (ops->stack(data, "IRQ") < 0)
- break;
- touch_nmi_watchdog();
- }
- put_cpu();
+unknown:
+ info->type = STACK_TYPE_UNKNOWN;
+ return -EINVAL;
}
-EXPORT_SYMBOL(dump_trace);
-void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl)
+void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *sp, char *log_lvl)
{
unsigned long *stack;
int i;
- if (sp == NULL) {
- if (regs)
- sp = (unsigned long *)regs->sp;
- else if (task)
- sp = (unsigned long *)task->thread.sp;
- else
- sp = (unsigned long *)&sp;
- }
+ if (!try_get_task_stack(task))
+ return;
+
+ sp = sp ? : get_stack_pointer(task, regs);
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
@@ -117,7 +145,9 @@
touch_nmi_watchdog();
}
pr_cont("\n");
- show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+ show_trace_log_lvl(task, regs, sp, log_lvl);
+
+ put_task_stack(task);
}
@@ -139,7 +169,7 @@
u8 *ip;
pr_emerg("Stack:\n");
- show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG);
+ show_stack_log_lvl(current, regs, NULL, KERN_EMERG);
pr_emerg("Code:");
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 9ee4520..36cf1a4 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,261 +16,145 @@
#include <asm/stacktrace.h>
-
-#define N_EXCEPTION_STACKS_END \
- (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
-
-static char x86_stack_ids[][8] = {
- [ DEBUG_STACK-1 ] = "#DB",
- [ NMI_STACK-1 ] = "NMI",
- [ DOUBLEFAULT_STACK-1 ] = "#DF",
- [ MCE_STACK-1 ] = "#MC",
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
- [ N_EXCEPTION_STACKS ...
- N_EXCEPTION_STACKS_END ] = "#DB[?]"
-#endif
+static char *exception_stack_names[N_EXCEPTION_STACKS] = {
+ [ DOUBLEFAULT_STACK-1 ] = "#DF",
+ [ NMI_STACK-1 ] = "NMI",
+ [ DEBUG_STACK-1 ] = "#DB",
+ [ MCE_STACK-1 ] = "#MC",
};
-static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
- unsigned *usedp, char **idp)
+static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
+};
+
+void stack_type_str(enum stack_type type, const char **begin, const char **end)
{
+ BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+
+ switch (type) {
+ case STACK_TYPE_IRQ:
+ *begin = "IRQ";
+ *end = "EOI";
+ break;
+ case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST:
+ *begin = exception_stack_names[type - STACK_TYPE_EXCEPTION];
+ *end = "EOE";
+ break;
+ default:
+ *begin = NULL;
+ *end = NULL;
+ }
+}
+
+static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
+{
+ unsigned long *begin, *end;
+ struct pt_regs *regs;
unsigned k;
- /*
- * Iterate over all exception stacks, and figure out whether
- * 'stack' is in one of them:
- */
+ BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
- unsigned long end = per_cpu(orig_ist, cpu).ist[k];
- /*
- * Is 'stack' above this exception frame's end?
- * If yes then skip to the next frame.
- */
- if (stack >= end)
+ end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k];
+ begin = end - (exception_stack_sizes[k] / sizeof(long));
+ regs = (struct pt_regs *)end - 1;
+
+ if (stack < begin || stack >= end)
continue;
- /*
- * Is 'stack' above this exception frame's start address?
- * If yes then we found the right frame.
- */
- if (stack >= end - EXCEPTION_STKSZ) {
- /*
- * Make sure we only iterate through an exception
- * stack once. If it comes up for the second time
- * then there's something wrong going on - just
- * break out and return NULL:
- */
- if (*usedp & (1U << k))
- break;
- *usedp |= 1U << k;
- *idp = x86_stack_ids[k];
- return (unsigned long *)end;
- }
- /*
- * If this is a debug stack, and if it has a larger size than
- * the usual exception stacks, then 'stack' might still
- * be within the lower portion of the debug stack:
- */
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
- if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
- unsigned j = N_EXCEPTION_STACKS - 1;
- /*
- * Black magic. A large debug stack is composed of
- * multiple exception stack entries, which we
- * iterate through now. Dont look:
- */
- do {
- ++j;
- end -= EXCEPTION_STKSZ;
- x86_stack_ids[j][4] = '1' +
- (j - N_EXCEPTION_STACKS);
- } while (stack < end - EXCEPTION_STKSZ);
- if (*usedp & (1U << j))
- break;
- *usedp |= 1U << j;
- *idp = x86_stack_ids[j];
- return (unsigned long *)end;
- }
-#endif
- }
- return NULL;
-}
+ info->type = STACK_TYPE_EXCEPTION + k;
+ info->begin = begin;
+ info->end = end;
+ info->next_sp = (unsigned long *)regs->sp;
-static inline int
-in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
- unsigned long *irq_stack_end)
-{
- return (stack >= irq_stack && stack < irq_stack_end);
-}
-
-static const unsigned long irq_stack_size =
- (IRQ_STACK_SIZE - 64) / sizeof(unsigned long);
-
-enum stack_type {
- STACK_IS_UNKNOWN,
- STACK_IS_NORMAL,
- STACK_IS_EXCEPTION,
- STACK_IS_IRQ,
-};
-
-static enum stack_type
-analyze_stack(int cpu, struct task_struct *task, unsigned long *stack,
- unsigned long **stack_end, unsigned long *irq_stack,
- unsigned *used, char **id)
-{
- unsigned long addr;
-
- addr = ((unsigned long)stack & (~(THREAD_SIZE - 1)));
- if ((unsigned long)task_stack_page(task) == addr)
- return STACK_IS_NORMAL;
-
- *stack_end = in_exception_stack(cpu, (unsigned long)stack,
- used, id);
- if (*stack_end)
- return STACK_IS_EXCEPTION;
-
- if (!irq_stack)
- return STACK_IS_NORMAL;
-
- *stack_end = irq_stack;
- irq_stack = irq_stack - irq_stack_size;
-
- if (in_irq_stack(stack, irq_stack, *stack_end))
- return STACK_IS_IRQ;
-
- return STACK_IS_UNKNOWN;
-}
-
-/*
- * x86-64 can have up to three kernel stacks:
- * process stack
- * interrupt stack
- * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
- */
-
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data)
-{
- const unsigned cpu = get_cpu();
- unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
- unsigned long dummy;
- unsigned used = 0;
- int graph = 0;
- int done = 0;
-
- if (!task)
- task = current;
-
- if (!stack) {
- if (regs)
- stack = (unsigned long *)regs->sp;
- else if (task != current)
- stack = (unsigned long *)task->thread.sp;
- else
- stack = &dummy;
+ return true;
}
- if (!bp)
- bp = stack_frame(task, regs);
- /*
- * Print function call entries in all stacks, starting at the
- * current stack address. If the stacks consist of nested
- * exceptions
- */
- while (!done) {
- unsigned long *stack_end;
- enum stack_type stype;
- char *id;
+ return false;
+}
- stype = analyze_stack(cpu, task, stack, &stack_end,
- irq_stack, &used, &id);
-
- /* Default finish unless specified to continue */
- done = 1;
-
- switch (stype) {
-
- /* Break out early if we are on the thread stack */
- case STACK_IS_NORMAL:
- break;
-
- case STACK_IS_EXCEPTION:
-
- if (ops->stack(data, id) < 0)
- break;
-
- bp = ops->walk_stack(task, stack, bp, ops,
- data, stack_end, &graph);
- ops->stack(data, "<EOE>");
- /*
- * We link to the next stack via the
- * second-to-last pointer (index -2 to end) in the
- * exception stack:
- */
- stack = (unsigned long *) stack_end[-2];
- done = 0;
- break;
-
- case STACK_IS_IRQ:
-
- if (ops->stack(data, "IRQ") < 0)
- break;
- bp = ops->walk_stack(task, stack, bp,
- ops, data, stack_end, &graph);
- /*
- * We link to the next stack (which would be
- * the process stack normally) the last
- * pointer (index -1 to end) in the IRQ stack:
- */
- stack = (unsigned long *) (stack_end[-1]);
- irq_stack = NULL;
- ops->stack(data, "EOI");
- done = 0;
- break;
-
- case STACK_IS_UNKNOWN:
- ops->stack(data, "UNK");
- break;
- }
- }
+static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
+{
+ unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr);
+ unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
/*
- * This handles the process stack:
+ * This is a software stack, so 'end' can be a valid stack pointer.
+ * It just means the stack is empty.
*/
- bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph);
- put_cpu();
-}
-EXPORT_SYMBOL(dump_trace);
+ if (stack < begin || stack > end)
+ return false;
-void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl)
+ info->type = STACK_TYPE_IRQ;
+ info->begin = begin;
+ info->end = end;
+
+ /*
+ * The next stack pointer is the first thing pushed by the entry code
+ * after switching to the irq stack.
+ */
+ info->next_sp = (unsigned long *)*(end - 1);
+
+ return true;
+}
+
+int get_stack_info(unsigned long *stack, struct task_struct *task,
+ struct stack_info *info, unsigned long *visit_mask)
+{
+ if (!stack)
+ goto unknown;
+
+ task = task ? : current;
+
+ if (in_task_stack(stack, task, info))
+ goto recursion_check;
+
+ if (task != current)
+ goto unknown;
+
+ if (in_exception_stack(stack, info))
+ goto recursion_check;
+
+ if (in_irq_stack(stack, info))
+ goto recursion_check;
+
+ goto unknown;
+
+recursion_check:
+ /*
+ * Make sure we don't iterate through any given stack more than once.
+ * If it comes up a second time then there's something wrong going on:
+ * just break out and report an unknown stack type.
+ */
+ if (visit_mask) {
+ if (*visit_mask & (1UL << info->type))
+ goto unknown;
+ *visit_mask |= 1UL << info->type;
+ }
+
+ return 0;
+
+unknown:
+ info->type = STACK_TYPE_UNKNOWN;
+ return -EINVAL;
+}
+
+void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *sp, char *log_lvl)
{
unsigned long *irq_stack_end;
unsigned long *irq_stack;
unsigned long *stack;
- int cpu;
int i;
- preempt_disable();
- cpu = smp_processor_id();
+ if (!try_get_task_stack(task))
+ return;
- irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
- irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
+ irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr);
+ irq_stack = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
- /*
- * Debugging aid: "show_stack(NULL, NULL);" prints the
- * back trace for this cpu:
- */
- if (sp == NULL) {
- if (regs)
- sp = (unsigned long *)regs->sp;
- else if (task)
- sp = (unsigned long *)task->thread.sp;
- else
- sp = (unsigned long *)&sp;
- }
+ sp = sp ? : get_stack_pointer(task, regs);
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
@@ -299,18 +183,17 @@
stack++;
touch_nmi_watchdog();
}
- preempt_enable();
pr_cont("\n");
- show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+ show_trace_log_lvl(task, regs, sp, log_lvl);
+
+ put_task_stack(task);
}
void show_regs(struct pt_regs *regs)
{
int i;
- unsigned long sp;
- sp = regs->sp;
show_regs_print_info(KERN_DEFAULT);
__show_regs(regs, 1);
@@ -325,8 +208,7 @@
u8 *ip;
printk(KERN_DEFAULT "Stack:\n");
- show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
- 0, KERN_DEFAULT);
+ show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
printk(KERN_DEFAULT "Code: ");
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 621b501..b85fe5f 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -40,8 +40,10 @@
* user can e.g. boot the original kernel with mem=1G while still booting the
* next kernel with full memory.
*/
-struct e820map e820;
-struct e820map e820_saved;
+static struct e820map initial_e820 __initdata;
+static struct e820map initial_e820_saved __initdata;
+struct e820map *e820 __refdata = &initial_e820;
+struct e820map *e820_saved __refdata = &initial_e820_saved;
/* For PCI or other memory-mapped resources */
unsigned long pci_mem_start = 0xaeedbabe;
@@ -58,8 +60,8 @@
{
int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
+ for (i = 0; i < e820->nr_map; i++) {
+ struct e820entry *ei = &e820->map[i];
if (type && ei->type != type)
continue;
@@ -81,8 +83,8 @@
{
int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
+ for (i = 0; i < e820->nr_map; i++) {
+ struct e820entry *ei = &e820->map[i];
if (type && ei->type != type)
continue;
@@ -128,7 +130,7 @@
void __init e820_add_region(u64 start, u64 size, int type)
{
- __e820_add_region(&e820, start, size, type);
+ __e820_add_region(e820, start, size, type);
}
static void __init e820_print_type(u32 type)
@@ -164,12 +166,12 @@
{
int i;
- for (i = 0; i < e820.nr_map; i++) {
+ for (i = 0; i < e820->nr_map; i++) {
printk(KERN_INFO "%s: [mem %#018Lx-%#018Lx] ", who,
- (unsigned long long) e820.map[i].addr,
+ (unsigned long long) e820->map[i].addr,
(unsigned long long)
- (e820.map[i].addr + e820.map[i].size - 1));
- e820_print_type(e820.map[i].type);
+ (e820->map[i].addr + e820->map[i].size - 1));
+ e820_print_type(e820->map[i].type);
printk(KERN_CONT "\n");
}
}
@@ -348,7 +350,7 @@
* continue building up new bios map based on this
* information
*/
- if (current_type != last_type || current_type == E820_PRAM) {
+ if (current_type != last_type) {
if (last_type != 0) {
new_bios[new_bios_entry].size =
change_point[chgidx]->addr - last_addr;
@@ -388,11 +390,11 @@
while (nr_map) {
u64 start = biosmap->addr;
u64 size = biosmap->size;
- u64 end = start + size;
+ u64 end = start + size - 1;
u32 type = biosmap->type;
/* Overflow in 64 bits? Ignore the memory map. */
- if (start > end)
+ if (start > end && likely(size))
return -1;
e820_add_region(start, size, type);
@@ -493,13 +495,13 @@
u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
unsigned new_type)
{
- return __e820_update_range(&e820, start, size, old_type, new_type);
+ return __e820_update_range(e820, start, size, old_type, new_type);
}
static u64 __init e820_update_range_saved(u64 start, u64 size,
unsigned old_type, unsigned new_type)
{
- return __e820_update_range(&e820_saved, start, size, old_type,
+ return __e820_update_range(e820_saved, start, size, old_type,
new_type);
}
@@ -521,8 +523,8 @@
e820_print_type(old_type);
printk(KERN_CONT "\n");
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
+ for (i = 0; i < e820->nr_map; i++) {
+ struct e820entry *ei = &e820->map[i];
u64 final_start, final_end;
u64 ei_end;
@@ -566,15 +568,15 @@
void __init update_e820(void)
{
- if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map))
+ if (sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map))
return;
printk(KERN_INFO "e820: modified physical RAM map:\n");
e820_print_map("modified");
}
static void __init update_e820_saved(void)
{
- sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map),
- &e820_saved.nr_map);
+ sanitize_e820_map(e820_saved->map, ARRAY_SIZE(e820_saved->map),
+ &e820_saved->nr_map);
}
#define MAX_GAP_END 0x100000000ull
/*
@@ -584,14 +586,14 @@
unsigned long start_addr, unsigned long long end_addr)
{
unsigned long long last;
- int i = e820.nr_map;
+ int i = e820->nr_map;
int found = 0;
last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
while (--i >= 0) {
- unsigned long long start = e820.map[i].addr;
- unsigned long long end = start + e820.map[i].size;
+ unsigned long long start = e820->map[i].addr;
+ unsigned long long end = start + e820->map[i].size;
if (end < start_addr)
continue;
@@ -649,6 +651,33 @@
gapstart, gapstart + gapsize - 1);
}
+/*
+ * Called late during init, in free_initmem().
+ *
+ * Initial e820 and e820_saved are largish __initdata arrays.
+ * Copy them to (usually much smaller) dynamically allocated area.
+ * This is done after all tweaks we ever do to them:
+ * all functions which modify them are __init functions,
+ * they won't exist after this point.
+ */
+__init void e820_reallocate_tables(void)
+{
+ struct e820map *n;
+ int size;
+
+ size = offsetof(struct e820map, map) + sizeof(struct e820entry) * e820->nr_map;
+ n = kmalloc(size, GFP_KERNEL);
+ BUG_ON(!n);
+ memcpy(n, e820, size);
+ e820 = n;
+
+ size = offsetof(struct e820map, map) + sizeof(struct e820entry) * e820_saved->nr_map;
+ n = kmalloc(size, GFP_KERNEL);
+ BUG_ON(!n);
+ memcpy(n, e820_saved, size);
+ e820_saved = n;
+}
+
/**
* Because of the size limitation of struct boot_params, only first
* 128 E820 memory entries are passed to kernel via
@@ -665,7 +694,7 @@
entries = sdata->len / sizeof(struct e820entry);
extmap = (struct e820entry *)(sdata->data);
__append_e820_map(extmap, entries);
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
early_memunmap(sdata, data_len);
printk(KERN_INFO "e820: extended physical RAM map:\n");
e820_print_map("extended");
@@ -686,8 +715,8 @@
int i;
unsigned long pfn = 0;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
+ for (i = 0; i < e820->nr_map; i++) {
+ struct e820entry *ei = &e820->map[i];
if (pfn < PFN_UP(ei->addr))
register_nosave_region(pfn, PFN_UP(ei->addr));
@@ -712,8 +741,8 @@
{
int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
+ for (i = 0; i < e820->nr_map; i++) {
+ struct e820entry *ei = &e820->map[i];
if (ei->type == E820_NVS)
acpi_nvs_register(ei->addr, ei->size);
@@ -754,22 +783,18 @@
/*
* Find the highest page frame number we have available
*/
-static unsigned long __init e820_end_pfn(unsigned long limit_pfn)
+static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
{
int i;
unsigned long last_pfn = 0;
unsigned long max_arch_pfn = MAX_ARCH_PFN;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
+ for (i = 0; i < e820->nr_map; i++) {
+ struct e820entry *ei = &e820->map[i];
unsigned long start_pfn;
unsigned long end_pfn;
- /*
- * Persistent memory is accounted as ram for purposes of
- * establishing max_pfn and mem_map.
- */
- if (ei->type != E820_RAM && ei->type != E820_PRAM)
+ if (ei->type != type)
continue;
start_pfn = ei->addr >> PAGE_SHIFT;
@@ -794,15 +819,15 @@
}
unsigned long __init e820_end_of_ram_pfn(void)
{
- return e820_end_pfn(MAX_ARCH_PFN);
+ return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
}
unsigned long __init e820_end_of_low_ram_pfn(void)
{
- return e820_end_pfn(1UL << (32-PAGE_SHIFT));
+ return e820_end_pfn(1UL << (32 - PAGE_SHIFT), E820_RAM);
}
-static void early_panic(char *msg)
+static void __init early_panic(char *msg)
{
early_printk(msg);
panic(msg);
@@ -856,7 +881,7 @@
*/
saved_max_pfn = e820_end_of_ram_pfn();
#endif
- e820.nr_map = 0;
+ e820->nr_map = 0;
userdef = 1;
return 0;
}
@@ -903,8 +928,8 @@
void __init finish_e820_parsing(void)
{
if (userdef) {
- if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map),
- &e820.nr_map) < 0)
+ if (sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map),
+ &e820->nr_map) < 0)
early_panic("Invalid user supplied memory map");
printk(KERN_INFO "e820: user-defined physical RAM map:\n");
@@ -912,7 +937,7 @@
}
}
-static const char *e820_type_to_string(int e820_type)
+static const char *__init e820_type_to_string(int e820_type)
{
switch (e820_type) {
case E820_RESERVED_KERN:
@@ -926,7 +951,7 @@
}
}
-static unsigned long e820_type_to_iomem_type(int e820_type)
+static unsigned long __init e820_type_to_iomem_type(int e820_type)
{
switch (e820_type) {
case E820_RESERVED_KERN:
@@ -942,7 +967,7 @@
}
}
-static unsigned long e820_type_to_iores_desc(int e820_type)
+static unsigned long __init e820_type_to_iores_desc(int e820_type)
{
switch (e820_type) {
case E820_ACPI:
@@ -961,7 +986,7 @@
}
}
-static bool do_mark_busy(u32 type, struct resource *res)
+static bool __init do_mark_busy(u32 type, struct resource *res)
{
/* this is the legacy bios/dos rom-shadow + mmio region */
if (res->start < (1ULL<<20))
@@ -991,35 +1016,35 @@
struct resource *res;
u64 end;
- res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
+ res = alloc_bootmem(sizeof(struct resource) * e820->nr_map);
e820_res = res;
- for (i = 0; i < e820.nr_map; i++) {
- end = e820.map[i].addr + e820.map[i].size - 1;
+ for (i = 0; i < e820->nr_map; i++) {
+ end = e820->map[i].addr + e820->map[i].size - 1;
if (end != (resource_size_t)end) {
res++;
continue;
}
- res->name = e820_type_to_string(e820.map[i].type);
- res->start = e820.map[i].addr;
+ res->name = e820_type_to_string(e820->map[i].type);
+ res->start = e820->map[i].addr;
res->end = end;
- res->flags = e820_type_to_iomem_type(e820.map[i].type);
- res->desc = e820_type_to_iores_desc(e820.map[i].type);
+ res->flags = e820_type_to_iomem_type(e820->map[i].type);
+ res->desc = e820_type_to_iores_desc(e820->map[i].type);
/*
* don't register the region that could be conflicted with
* pci device BAR resource and insert them later in
* pcibios_resource_survey()
*/
- if (do_mark_busy(e820.map[i].type, res)) {
+ if (do_mark_busy(e820->map[i].type, res)) {
res->flags |= IORESOURCE_BUSY;
insert_resource(&iomem_resource, res);
}
res++;
}
- for (i = 0; i < e820_saved.nr_map; i++) {
- struct e820entry *entry = &e820_saved.map[i];
+ for (i = 0; i < e820_saved->nr_map; i++) {
+ struct e820entry *entry = &e820_saved->map[i];
firmware_map_add_early(entry->addr,
entry->addr + entry->size,
e820_type_to_string(entry->type));
@@ -1027,7 +1052,7 @@
}
/* How much should we pad RAM ending depending on where it is? */
-static unsigned long ram_alignment(resource_size_t pos)
+static unsigned long __init ram_alignment(resource_size_t pos)
{
unsigned long mb = pos >> 20;
@@ -1051,7 +1076,7 @@
struct resource *res;
res = e820_res;
- for (i = 0; i < e820.nr_map; i++) {
+ for (i = 0; i < e820->nr_map; i++) {
if (!res->parent && res->end)
insert_resource_expand_to_fit(&iomem_resource, res);
res++;
@@ -1061,8 +1086,8 @@
* Try to bump up RAM regions to reasonable boundaries to
* avoid stolen RAM:
*/
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *entry = &e820.map[i];
+ for (i = 0; i < e820->nr_map; i++) {
+ struct e820entry *entry = &e820->map[i];
u64 start, end;
if (entry->type != E820_RAM)
@@ -1110,7 +1135,7 @@
who = "BIOS-e801";
}
- e820.nr_map = 0;
+ e820->nr_map = 0;
e820_add_region(0, LOWMEMSIZE(), E820_RAM);
e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
}
@@ -1124,7 +1149,7 @@
char *who;
who = x86_init.resources.memory_setup();
- memcpy(&e820_saved, &e820, sizeof(struct e820map));
+ memcpy(e820_saved, e820, sizeof(struct e820map));
printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n");
e820_print_map(who);
}
@@ -1141,8 +1166,8 @@
*/
memblock_allow_resize();
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
+ for (i = 0; i < e820->nr_map; i++) {
+ struct e820entry *ei = &e820->map[i];
end = ei->addr + ei->size;
if (end != (resource_size_t)end)
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index de7501e..18bb3a6 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -555,7 +555,7 @@
/* Mark this space as reserved */
e820_add_region(base, size, E820_RESERVED);
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
}
static void __init intel_graphics_quirks(int num, int slot, int func)
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 93982ae..2f2b8c7 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -317,7 +317,6 @@
on_boot_cpu = 0;
WARN_ON_FPU(current->thread.fpu.fpstate_active);
- current_thread_info()->status = 0;
if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
eagerfpu = ENABLE;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index d036cfb..8639bb2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -1029,7 +1029,7 @@
}
if (ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer) == -EBUSY) {
+ frame_pointer, parent) == -EBUSY) {
*parent = old;
return;
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6f8902b..5f40126 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -94,7 +94,7 @@
*/
__HEAD
ENTRY(startup_32)
- movl pa(stack_start),%ecx
+ movl pa(initial_stack),%ecx
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
us to not reload segments */
@@ -286,7 +286,7 @@
* start_secondary().
*/
ENTRY(start_cpu0)
- movl stack_start, %ecx
+ movl initial_stack, %ecx
movl %ecx, %esp
jmp *(initial_code)
ENDPROC(start_cpu0)
@@ -307,7 +307,7 @@
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
- movl pa(stack_start),%ecx
+ movl pa(initial_stack),%ecx
movl %eax,%ss
leal -__PAGE_OFFSET(%ecx),%esp
@@ -703,7 +703,7 @@
.data
.balign 4
-ENTRY(stack_start)
+ENTRY(initial_stack)
.long init_thread_union+THREAD_SIZE
__INITRODATA
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 9f8efc9..c98a559 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -66,7 +66,7 @@
*/
/*
- * Setup stack for verify_cpu(). "-8" because stack_start is defined
+ * Setup stack for verify_cpu(). "-8" because initial_stack is defined
* this way, see below. Our best guess is a NULL ptr for stack
* termination heuristics and we don't want to break anything which
* might depend on it (kgdb, ...).
@@ -226,7 +226,7 @@
movq %rax, %cr0
/* Setup a boot time stack */
- movq stack_start(%rip), %rsp
+ movq initial_stack(%rip), %rsp
/* zero EFLAGS after setting rsp */
pushq $0
@@ -310,7 +310,7 @@
* start_secondary().
*/
ENTRY(start_cpu0)
- movq stack_start(%rip),%rsp
+ movq initial_stack(%rip),%rsp
movq initial_code(%rip),%rax
pushq $0 # fake return address to stop unwinder
pushq $__KERNEL_CS # set correct cs
@@ -319,17 +319,15 @@
ENDPROC(start_cpu0)
#endif
- /* SMP bootup changes these two */
+ /* Both SMP bootup and ACPI suspend change these variables */
__REFDATA
.balign 8
GLOBAL(initial_code)
.quad x86_64_start_kernel
GLOBAL(initial_gs)
.quad INIT_PER_CPU_VAR(irq_stack_union)
-
- GLOBAL(stack_start)
+ GLOBAL(initial_stack)
.quad init_thread_union+THREAD_SIZE-8
- .word 0
__FINITDATA
bad_address:
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index c6dfd80..274fab9 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -756,10 +756,104 @@
/*
* Clock source related code
*/
+#if defined(CONFIG_SMP) && defined(CONFIG_64BIT)
+/*
+ * Reading the HPET counter is a very slow operation. If a large number of
+ * CPUs are trying to access the HPET counter simultaneously, it can cause
+ * massive delay and slow down system performance dramatically. This may
+ * happen when HPET is the default clock source instead of TSC. For a
+ * really large system with hundreds of CPUs, the slowdown may be so
+ * severe that it may actually crash the system because of a NMI watchdog
+ * soft lockup, for example.
+ *
+ * If multiple CPUs are trying to access the HPET counter at the same time,
+ * we don't actually need to read the counter multiple times. Instead, the
+ * other CPUs can use the counter value read by the first CPU in the group.
+ *
+ * This special feature is only enabled on x86-64 systems. It is unlikely
+ * that 32-bit x86 systems will have enough CPUs to require this feature
+ * with its associated locking overhead. And we also need 64-bit atomic
+ * read.
+ *
+ * The lock and the hpet value are stored together and can be read in a
+ * single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t
+ * is 32 bits in size.
+ */
+union hpet_lock {
+ struct {
+ arch_spinlock_t lock;
+ u32 value;
+ };
+ u64 lockval;
+};
+
+static union hpet_lock hpet __cacheline_aligned = {
+ { .lock = __ARCH_SPIN_LOCK_UNLOCKED, },
+};
+
+static cycle_t read_hpet(struct clocksource *cs)
+{
+ unsigned long flags;
+ union hpet_lock old, new;
+
+ BUILD_BUG_ON(sizeof(union hpet_lock) != 8);
+
+ /*
+ * Read HPET directly if in NMI.
+ */
+ if (in_nmi())
+ return (cycle_t)hpet_readl(HPET_COUNTER);
+
+ /*
+ * Read the current state of the lock and HPET value atomically.
+ */
+ old.lockval = READ_ONCE(hpet.lockval);
+
+ if (arch_spin_is_locked(&old.lock))
+ goto contended;
+
+ local_irq_save(flags);
+ if (arch_spin_trylock(&hpet.lock)) {
+ new.value = hpet_readl(HPET_COUNTER);
+ /*
+ * Use WRITE_ONCE() to prevent store tearing.
+ */
+ WRITE_ONCE(hpet.value, new.value);
+ arch_spin_unlock(&hpet.lock);
+ local_irq_restore(flags);
+ return (cycle_t)new.value;
+ }
+ local_irq_restore(flags);
+
+contended:
+ /*
+ * Contended case
+ * --------------
+ * Wait until the HPET value change or the lock is free to indicate
+ * its value is up-to-date.
+ *
+ * It is possible that old.value has already contained the latest
+ * HPET value while the lock holder was in the process of releasing
+ * the lock. Checking for lock state change will enable us to return
+ * the value immediately instead of waiting for the next HPET reader
+ * to come along.
+ */
+ do {
+ cpu_relax();
+ new.lockval = READ_ONCE(hpet.lockval);
+ } while ((new.value == old.value) && arch_spin_is_locked(&new.lock));
+
+ return (cycle_t)new.value;
+}
+#else
+/*
+ * For UP or 32-bit.
+ */
static cycle_t read_hpet(struct clocksource *cs)
{
return (cycle_t)hpet_readl(HPET_COUNTER);
}
+#endif
static struct clocksource clocksource_hpet = {
.name = "hpet",
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 4a79037..9ebd0b0 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -40,8 +40,7 @@
if (user_mode(regs))
return;
- if (regs->sp >= curbase + sizeof(struct thread_info) +
- sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
+ if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
regs->sp <= curbase + THREAD_SIZE)
return;
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index f2356bd..3407b14 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -99,14 +99,14 @@
{
unsigned int nr_e820_entries;
- nr_e820_entries = e820_saved.nr_map;
+ nr_e820_entries = e820_saved->nr_map;
/* TODO: Pass entries more than E820MAX in bootparams setup data */
if (nr_e820_entries > E820MAX)
nr_e820_entries = E820MAX;
params->e820_entries = nr_e820_entries;
- memcpy(¶ms->e820_map, &e820_saved.map,
+ memcpy(¶ms->e820_map, &e820_saved->map,
nr_e820_entries * sizeof(struct e820entry));
return 0;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 04cde52..8e36f24 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -50,6 +50,7 @@
#include <asm/apicdef.h>
#include <asm/apic.h>
#include <asm/nmi.h>
+#include <asm/switch_to.h>
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
{
@@ -166,21 +167,19 @@
gdb_regs[GDB_DX] = 0;
gdb_regs[GDB_SI] = 0;
gdb_regs[GDB_DI] = 0;
- gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp;
+ gdb_regs[GDB_BP] = ((struct inactive_task_frame *)p->thread.sp)->bp;
#ifdef CONFIG_X86_32
gdb_regs[GDB_DS] = __KERNEL_DS;
gdb_regs[GDB_ES] = __KERNEL_DS;
gdb_regs[GDB_PS] = 0;
gdb_regs[GDB_CS] = __KERNEL_CS;
- gdb_regs[GDB_PC] = p->thread.ip;
gdb_regs[GDB_SS] = __KERNEL_DS;
gdb_regs[GDB_FS] = 0xFFFF;
gdb_regs[GDB_GS] = 0xFFFF;
#else
- gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8);
+ gdb_regs32[GDB_PS] = 0;
gdb_regs32[GDB_CS] = __KERNEL_CS;
gdb_regs32[GDB_SS] = __KERNEL_DS;
- gdb_regs[GDB_PC] = 0;
gdb_regs[GDB_R8] = 0;
gdb_regs[GDB_R9] = 0;
gdb_regs[GDB_R10] = 0;
@@ -190,6 +189,7 @@
gdb_regs[GDB_R14] = 0;
gdb_regs[GDB_R15] = 0;
#endif
+ gdb_regs[GDB_PC] = 0;
gdb_regs[GDB_SP] = p->thread.sp;
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 7847e5c..28cee01 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -45,7 +45,7 @@
#include <linux/slab.h>
#include <linux/hardirq.h>
#include <linux/preempt.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 4425f59..3bb4c5f 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -24,7 +24,7 @@
#include <linux/slab.h>
#include <linux/hardirq.h>
#include <linux/preempt.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index c2bedae..4afc67f 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -184,7 +184,7 @@
static struct kobj_attribute type_attr = __ATTR_RO(type);
-static struct bin_attribute data_attr = {
+static struct bin_attribute data_attr __ro_after_init = {
.attr = {
.name = "data",
.mode = S_IRUGO,
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1726c4c..edbbfc8 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -423,12 +423,7 @@
kvm_spinlock_init();
}
-static void kvm_guest_cpu_online(void *dummy)
-{
- kvm_guest_cpu_init();
-}
-
-static void kvm_guest_cpu_offline(void *dummy)
+static void kvm_guest_cpu_offline(void)
{
kvm_disable_steal_time();
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -437,29 +432,21 @@
apf_task_wake_all();
}
-static int kvm_cpu_notify(struct notifier_block *self, unsigned long action,
- void *hcpu)
+static int kvm_cpu_online(unsigned int cpu)
{
- int cpu = (unsigned long)hcpu;
- switch (action) {
- case CPU_ONLINE:
- case CPU_DOWN_FAILED:
- case CPU_ONLINE_FROZEN:
- smp_call_function_single(cpu, kvm_guest_cpu_online, NULL, 0);
- break;
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- smp_call_function_single(cpu, kvm_guest_cpu_offline, NULL, 1);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
+ local_irq_disable();
+ kvm_guest_cpu_init();
+ local_irq_enable();
+ return 0;
}
-static struct notifier_block kvm_cpu_notifier = {
- .notifier_call = kvm_cpu_notify,
-};
+static int kvm_cpu_down_prepare(unsigned int cpu)
+{
+ local_irq_disable();
+ kvm_guest_cpu_offline();
+ local_irq_enable();
+ return 0;
+}
#endif
static void __init kvm_apf_trap_init(void)
@@ -494,7 +481,9 @@
#ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
- register_cpu_notifier(&kvm_cpu_notifier);
+ if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
+ kvm_cpu_online, kvm_cpu_down_prepare) < 0)
+ pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
#else
kvm_guest_cpu_init();
#endif
@@ -575,9 +564,6 @@
kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
}
-
-#ifdef CONFIG_QUEUED_SPINLOCKS
-
#include <asm/qspinlock.h>
static void kvm_wait(u8 *ptr, u8 val)
@@ -606,243 +592,6 @@
local_irq_restore(flags);
}
-#else /* !CONFIG_QUEUED_SPINLOCKS */
-
-enum kvm_contention_stat {
- TAKEN_SLOW,
- TAKEN_SLOW_PICKUP,
- RELEASED_SLOW,
- RELEASED_SLOW_KICKED,
- NR_CONTENTION_STATS
-};
-
-#ifdef CONFIG_KVM_DEBUG_FS
-#define HISTO_BUCKETS 30
-
-static struct kvm_spinlock_stats
-{
- u32 contention_stats[NR_CONTENTION_STATS];
- u32 histo_spin_blocked[HISTO_BUCKETS+1];
- u64 time_blocked;
-} spinlock_stats;
-
-static u8 zero_stats;
-
-static inline void check_zero(void)
-{
- u8 ret;
- u8 old;
-
- old = READ_ONCE(zero_stats);
- if (unlikely(old)) {
- ret = cmpxchg(&zero_stats, old, 0);
- /* This ensures only one fellow resets the stat */
- if (ret == old)
- memset(&spinlock_stats, 0, sizeof(spinlock_stats));
- }
-}
-
-static inline void add_stats(enum kvm_contention_stat var, u32 val)
-{
- check_zero();
- spinlock_stats.contention_stats[var] += val;
-}
-
-
-static inline u64 spin_time_start(void)
-{
- return sched_clock();
-}
-
-static void __spin_time_accum(u64 delta, u32 *array)
-{
- unsigned index;
-
- index = ilog2(delta);
- check_zero();
-
- if (index < HISTO_BUCKETS)
- array[index]++;
- else
- array[HISTO_BUCKETS]++;
-}
-
-static inline void spin_time_accum_blocked(u64 start)
-{
- u32 delta;
-
- delta = sched_clock() - start;
- __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
- spinlock_stats.time_blocked += delta;
-}
-
-static struct dentry *d_spin_debug;
-static struct dentry *d_kvm_debug;
-
-static struct dentry *kvm_init_debugfs(void)
-{
- d_kvm_debug = debugfs_create_dir("kvm-guest", NULL);
- if (!d_kvm_debug)
- printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
-
- return d_kvm_debug;
-}
-
-static int __init kvm_spinlock_debugfs(void)
-{
- struct dentry *d_kvm;
-
- d_kvm = kvm_init_debugfs();
- if (d_kvm == NULL)
- return -ENOMEM;
-
- d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
-
- debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
-
- debugfs_create_u32("taken_slow", 0444, d_spin_debug,
- &spinlock_stats.contention_stats[TAKEN_SLOW]);
- debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
- &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
-
- debugfs_create_u32("released_slow", 0444, d_spin_debug,
- &spinlock_stats.contention_stats[RELEASED_SLOW]);
- debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
- &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
-
- debugfs_create_u64("time_blocked", 0444, d_spin_debug,
- &spinlock_stats.time_blocked);
-
- debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
- spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
-
- return 0;
-}
-fs_initcall(kvm_spinlock_debugfs);
-#else /* !CONFIG_KVM_DEBUG_FS */
-static inline void add_stats(enum kvm_contention_stat var, u32 val)
-{
-}
-
-static inline u64 spin_time_start(void)
-{
- return 0;
-}
-
-static inline void spin_time_accum_blocked(u64 start)
-{
-}
-#endif /* CONFIG_KVM_DEBUG_FS */
-
-struct kvm_lock_waiting {
- struct arch_spinlock *lock;
- __ticket_t want;
-};
-
-/* cpus 'waiting' on a spinlock to become available */
-static cpumask_t waiting_cpus;
-
-/* Track spinlock on which a cpu is waiting */
-static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
-
-__visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
-{
- struct kvm_lock_waiting *w;
- int cpu;
- u64 start;
- unsigned long flags;
- __ticket_t head;
-
- if (in_nmi())
- return;
-
- w = this_cpu_ptr(&klock_waiting);
- cpu = smp_processor_id();
- start = spin_time_start();
-
- /*
- * Make sure an interrupt handler can't upset things in a
- * partially setup state.
- */
- local_irq_save(flags);
-
- /*
- * The ordering protocol on this is that the "lock" pointer
- * may only be set non-NULL if the "want" ticket is correct.
- * If we're updating "want", we must first clear "lock".
- */
- w->lock = NULL;
- smp_wmb();
- w->want = want;
- smp_wmb();
- w->lock = lock;
-
- add_stats(TAKEN_SLOW, 1);
-
- /*
- * This uses set_bit, which is atomic but we should not rely on its
- * reordering gurantees. So barrier is needed after this call.
- */
- cpumask_set_cpu(cpu, &waiting_cpus);
-
- barrier();
-
- /*
- * Mark entry to slowpath before doing the pickup test to make
- * sure we don't deadlock with an unlocker.
- */
- __ticket_enter_slowpath(lock);
-
- /* make sure enter_slowpath, which is atomic does not cross the read */
- smp_mb__after_atomic();
-
- /*
- * check again make sure it didn't become free while
- * we weren't looking.
- */
- head = READ_ONCE(lock->tickets.head);
- if (__tickets_equal(head, want)) {
- add_stats(TAKEN_SLOW_PICKUP, 1);
- goto out;
- }
-
- /*
- * halt until it's our turn and kicked. Note that we do safe halt
- * for irq enabled case to avoid hang when lock info is overwritten
- * in irq spinlock slowpath and no spurious interrupt occur to save us.
- */
- if (arch_irqs_disabled_flags(flags))
- halt();
- else
- safe_halt();
-
-out:
- cpumask_clear_cpu(cpu, &waiting_cpus);
- w->lock = NULL;
- local_irq_restore(flags);
- spin_time_accum_blocked(start);
-}
-PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
-
-/* Kick vcpu waiting on @lock->head to reach value @ticket */
-static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
-{
- int cpu;
-
- add_stats(RELEASED_SLOW, 1);
- for_each_cpu(cpu, &waiting_cpus) {
- const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
- if (READ_ONCE(w->lock) == lock &&
- READ_ONCE(w->want) == ticket) {
- add_stats(RELEASED_SLOW_KICKED, 1);
- kvm_kick_cpu(cpu);
- break;
- }
- }
-}
-
-#endif /* !CONFIG_QUEUED_SPINLOCKS */
-
/*
* Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
*/
@@ -854,16 +603,11 @@
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
return;
-#ifdef CONFIG_QUEUED_SPINLOCKS
__pv_init_lock_hash();
pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
pv_lock_ops.wait = kvm_wait;
pv_lock_ops.kick = kvm_kick_cpu;
-#else /* !CONFIG_QUEUED_SPINLOCKS */
- pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
- pv_lock_ops.unlock_kick = kvm_unlock_kick;
-#endif
}
static __init int kvm_spinlock_init_jump(void)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 3692249..60b9949 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -29,7 +29,7 @@
#include <asm/x86_init.h>
#include <asm/reboot.h>
-static int kvmclock = 1;
+static int kvmclock __ro_after_init = 1;
static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
static cycle_t kvm_sched_clock_offset;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 068c4a9..0f8d204 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -499,6 +499,9 @@
{
struct mpf_intel *mpf = mpf_found;
+ if (!smp_found_config)
+ return;
+
if (!mpf)
return;
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 1939a02..2c55a00 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,7 +8,6 @@
#include <asm/paravirt.h>
-#ifdef CONFIG_QUEUED_SPINLOCKS
__visible void __native_queued_spin_unlock(struct qspinlock *lock)
{
native_queued_spin_unlock(lock);
@@ -21,19 +20,13 @@
return pv_lock_ops.queued_spin_unlock.func ==
__raw_callee_save___native_queued_spin_unlock;
}
-#endif
struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP
-#ifdef CONFIG_QUEUED_SPINLOCKS
.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
.queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
.wait = paravirt_nop,
.kick = paravirt_nop,
-#else /* !CONFIG_QUEUED_SPINLOCKS */
- .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
- .unlock_kick = paravirt_nop,
-#endif /* !CONFIG_QUEUED_SPINLOCKS */
#endif /* SMP */
};
EXPORT_SYMBOL(pv_lock_ops);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1acfd76..bbf3d59 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -332,7 +332,6 @@
.read_cr0 = native_read_cr0,
.write_cr0 = native_write_cr0,
.read_cr4 = native_read_cr4,
- .read_cr4_safe = native_read_cr4_safe,
.write_cr4 = native_write_cr4,
#ifdef CONFIG_X86_64
.read_cr8 = native_read_cr8,
@@ -389,7 +388,7 @@
#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
#endif
-struct pv_mmu_ops pv_mmu_ops = {
+struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
.read_cr2 = native_read_cr2,
.write_cr2 = native_write_cr2,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 158dc06..920c6ae 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -10,7 +10,7 @@
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
-#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
#endif
@@ -49,7 +49,7 @@
PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3);
PATCH_SITE(pv_cpu_ops, clts);
-#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
if (pv_is_native_spin_unlock()) {
start = start_pv_lock_ops_queued_spin_unlock;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index e70087a..bb3840c 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -19,7 +19,7 @@
DEF_NATIVE(, mov32, "mov %edi, %eax");
DEF_NATIVE(, mov64, "mov %rdi, %rax");
-#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
#endif
@@ -61,7 +61,7 @@
PATCH_SITE(pv_cpu_ops, clts);
PATCH_SITE(pv_mmu_ops, flush_tlb_single);
PATCH_SITE(pv_cpu_ops, wbinvd);
-#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
if (pv_is_native_spin_unlock()) {
start = start_pv_lock_ops_queued_spin_unlock;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..4002b47 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/vm86.h>
+#include <asm/switch_to.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -513,6 +514,17 @@
}
/*
+ * Return saved PC of a blocked thread.
+ * What is this good for? it will be always the scheduler or ret_from_fork.
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+ struct inactive_task_frame *frame =
+ (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
+ return READ_ONCE_NOCHECK(frame->ret_addr);
+}
+
+/*
* Called from fs/proc with a reference on @p to find the function
* which called into schedule(). This needs to be done carefully
* because the task might wake up and we might look at a stack
@@ -520,15 +532,18 @@
*/
unsigned long get_wchan(struct task_struct *p)
{
- unsigned long start, bottom, top, sp, fp, ip;
+ unsigned long start, bottom, top, sp, fp, ip, ret = 0;
int count = 0;
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
+ if (!try_get_task_stack(p))
+ return 0;
+
start = (unsigned long)task_stack_page(p);
if (!start)
- return 0;
+ goto out;
/*
* Layout of the stack page:
@@ -537,9 +552,7 @@
* PADDING
* ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
* stack
- * ----------- bottom = start + sizeof(thread_info)
- * thread_info
- * ----------- start
+ * ----------- bottom = start
*
* The tasks stack pointer points at the location where the
* framepointer is stored. The data on the stack is:
@@ -550,20 +563,25 @@
*/
top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
top -= 2 * sizeof(unsigned long);
- bottom = start + sizeof(struct thread_info);
+ bottom = start;
sp = READ_ONCE(p->thread.sp);
if (sp < bottom || sp > top)
- return 0;
+ goto out;
- fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
+ fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
do {
if (fp < bottom || fp > top)
- return 0;
+ goto out;
ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
- if (!in_sched_functions(ip))
- return ip;
+ if (!in_sched_functions(ip)) {
+ ret = ip;
+ goto out;
+ }
fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
} while (count++ < 16 && p->state != TASK_RUNNING);
- return 0;
+
+out:
+ put_task_stack(p);
+ return ret;
}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d86be29..bd7be8e 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,17 +55,6 @@
#include <asm/switch_to.h>
#include <asm/vm86.h>
-asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
-
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return ((unsigned long *)tsk->thread.sp)[3];
-}
-
void __show_regs(struct pt_regs *regs, int all)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
@@ -101,7 +90,7 @@
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
- cr4 = __read_cr4_safe();
+ cr4 = __read_cr4();
printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
cr0, cr2, cr3, cr4);
@@ -133,35 +122,31 @@
unsigned long arg, struct task_struct *p, unsigned long tls)
{
struct pt_regs *childregs = task_pt_regs(p);
+ struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs);
+ struct inactive_task_frame *frame = &fork_frame->frame;
struct task_struct *tsk;
int err;
- p->thread.sp = (unsigned long) childregs;
+ frame->bp = 0;
+ frame->ret_addr = (unsigned long) ret_from_fork;
+ p->thread.sp = (unsigned long) fork_frame;
p->thread.sp0 = (unsigned long) (childregs+1);
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
- p->thread.ip = (unsigned long) ret_from_kernel_thread;
- task_user_gs(p) = __KERNEL_STACK_CANARY;
- childregs->ds = __USER_DS;
- childregs->es = __USER_DS;
- childregs->fs = __KERNEL_PERCPU;
- childregs->bx = sp; /* function */
- childregs->bp = arg;
- childregs->orig_ax = -1;
- childregs->cs = __KERNEL_CS | get_kernel_rpl();
- childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
+ frame->bx = sp; /* function */
+ frame->di = arg;
p->thread.io_bitmap_ptr = NULL;
return 0;
}
+ frame->bx = 0;
*childregs = *current_pt_regs();
childregs->ax = 0;
if (sp)
childregs->sp = sp;
- p->thread.ip = (unsigned long) ret_from_fork;
task_user_gs(p) = get_user_gs(current_pt_regs());
p->thread.io_bitmap_ptr = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8..ee944bd 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -49,8 +49,7 @@
#include <asm/debugreg.h>
#include <asm/switch_to.h>
#include <asm/xen/hypervisor.h>
-
-asmlinkage extern void ret_from_fork(void);
+#include <asm/vdso.h>
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
@@ -141,12 +140,17 @@
{
int err;
struct pt_regs *childregs;
+ struct fork_frame *fork_frame;
+ struct inactive_task_frame *frame;
struct task_struct *me = current;
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
childregs = task_pt_regs(p);
- p->thread.sp = (unsigned long) childregs;
- set_tsk_thread_flag(p, TIF_FORK);
+ fork_frame = container_of(childregs, struct fork_frame, regs);
+ frame = &fork_frame->frame;
+ frame->bp = 0;
+ frame->ret_addr = (unsigned long) ret_from_fork;
+ p->thread.sp = (unsigned long) fork_frame;
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
@@ -160,15 +164,11 @@
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
- childregs->sp = (unsigned long)childregs;
- childregs->ss = __KERNEL_DS;
- childregs->bx = sp; /* function */
- childregs->bp = arg;
- childregs->orig_ax = -1;
- childregs->cs = __KERNEL_CS | get_kernel_rpl();
- childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
+ frame->bx = sp; /* function */
+ frame->r12 = arg;
return 0;
}
+ frame->bx = 0;
*childregs = *current_pt_regs();
childregs->ax = 0;
@@ -511,7 +511,7 @@
current->personality &= ~READ_IMPLIES_EXEC;
/* in_compat_syscall() uses the presence of the x32
syscall bit flag to determine compat status */
- current_thread_info()->status &= ~TS_COMPAT;
+ current->thread.status &= ~TS_COMPAT;
} else {
set_thread_flag(TIF_IA32);
clear_thread_flag(TIF_X32);
@@ -519,11 +519,24 @@
current->mm->context.ia32_compat = TIF_IA32;
current->personality |= force_personality32;
/* Prepare the first "return" to user space */
- current_thread_info()->status |= TS_COMPAT;
+ current->thread.status |= TS_COMPAT;
}
}
EXPORT_SYMBOL_GPL(set_personality_ia32);
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
+{
+ int ret;
+
+ ret = map_vdso_once(image, addr);
+ if (ret)
+ return ret;
+
+ return (long)image->size;
+}
+#endif
+
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
{
int ret = 0;
@@ -577,6 +590,19 @@
break;
}
+#ifdef CONFIG_CHECKPOINT_RESTORE
+# ifdef CONFIG_X86_X32_ABI
+ case ARCH_MAP_VDSO_X32:
+ return prctl_map_vdso(&vdso_image_x32, addr);
+# endif
+# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+ case ARCH_MAP_VDSO_32:
+ return prctl_map_vdso(&vdso_image_32, addr);
+# endif
+ case ARCH_MAP_VDSO_64:
+ return prctl_map_vdso(&vdso_image_64, addr);
+#endif
+
default:
ret = -EINVAL;
break;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f79576a..0e63c02 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -173,8 +173,8 @@
return sp;
prev_esp = (u32 *)(context);
- if (prev_esp)
- return (unsigned long)prev_esp;
+ if (*prev_esp)
+ return (unsigned long)*prev_esp;
return (unsigned long)regs;
}
@@ -934,7 +934,7 @@
*/
regs->orig_ax = value;
if (syscall_get_nr(child, regs) >= 0)
- task_thread_info(child)->status |= TS_I386_REGS_POKED;
+ child->thread.status |= TS_I386_REGS_POKED;
break;
case offsetof(struct user32, regs.eflags):
@@ -1250,7 +1250,7 @@
#ifdef CONFIG_X86_64
-static struct user_regset x86_64_regsets[] __read_mostly = {
+static struct user_regset x86_64_regsets[] __ro_after_init = {
[REGSET_GENERAL] = {
.core_note_type = NT_PRSTATUS,
.n = sizeof(struct user_regs_struct) / sizeof(long),
@@ -1291,7 +1291,7 @@
#endif /* CONFIG_X86_64 */
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
-static struct user_regset x86_32_regsets[] __read_mostly = {
+static struct user_regset x86_32_regsets[] __ro_after_init = {
[REGSET_GENERAL] = {
.core_note_type = NT_PRSTATUS,
.n = sizeof(struct user_regs_struct32) / sizeof(u32),
@@ -1344,7 +1344,7 @@
*/
u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
-void update_regset_xstate_info(unsigned int size, u64 xstate_mask)
+void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
{
#ifdef CONFIG_X86_64
x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
@@ -1358,7 +1358,7 @@
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
{
#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(task, TIF_IA32))
+ if (!user_64bit_mode(task_pt_regs(task)))
#endif
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
return &user_x86_32_view;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index cc457ff..51402a7 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -626,3 +626,34 @@
amd_disable_seq_and_redirect_scrub);
#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+#include <linux/jump_label.h>
+#include <asm/string_64.h>
+
+/* Ivy Bridge, Haswell, Broadwell */
+static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
+{
+ u32 capid0;
+
+ pci_read_config_dword(pdev, 0x84, &capid0);
+
+ if (capid0 & 0x10)
+ static_branch_inc(&mcsafe_key);
+}
+
+/* Skylake */
+static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
+{
+ u32 capid0;
+
+ pci_read_config_dword(pdev, 0x84, &capid0);
+
+ if ((capid0 & 0xc0) == 0xc0)
+ static_branch_inc(&mcsafe_key);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
+#endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 63bf27d..e244c19 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -705,7 +705,7 @@
tboot_shutdown(TB_SHUTDOWN_HALT);
}
-struct machine_ops machine_ops = {
+struct machine_ops machine_ops __ro_after_init = {
.power_off = native_machine_power_off,
.shutdown = native_machine_shutdown,
.emergency_restart = native_machine_emergency_restart,
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
index 80eab01..2408c16 100644
--- a/arch/x86/kernel/resource.c
+++ b/arch/x86/kernel/resource.c
@@ -27,8 +27,8 @@
int i;
struct e820entry *entry;
- for (i = 0; i < e820.nr_map; i++) {
- entry = &e820.map[i];
+ for (i = 0; i < e820->nr_map; i++) {
+ entry = &e820->map[i];
resource_clip(avail, entry->addr,
entry->addr + entry->size - 1);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0fa60f5..bbfbca5 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -210,9 +210,9 @@
#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-__visible unsigned long mmu_cr4_features;
+__visible unsigned long mmu_cr4_features __ro_after_init;
#else
-__visible unsigned long mmu_cr4_features = X86_CR4_PAE;
+__visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE;
#endif
/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
@@ -458,8 +458,8 @@
early_memunmap(data, sizeof(*data));
}
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- memcpy(&e820_saved, &e820, sizeof(struct e820map));
+ sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
+ memcpy(e820_saved, e820, sizeof(struct e820map));
printk(KERN_INFO "extended physical RAM map:\n");
e820_print_map("reserve setup_data");
}
@@ -763,7 +763,7 @@
*/
e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
}
/* called before trim_bios_range() to spare extra sanitize */
@@ -1032,7 +1032,7 @@
if (ppro_with_ram_bug()) {
e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
E820_RESERVED);
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
printk(KERN_INFO "fixed physical RAM map:\n");
e820_print_map("bad_ppro");
}
@@ -1096,19 +1096,19 @@
memblock_set_current_limit(ISA_END_ADDRESS);
memblock_x86_fill();
- if (efi_enabled(EFI_BOOT)) {
- efi_fake_memmap();
- efi_find_mirror();
- }
-
reserve_bios_regions();
- /*
- * The EFI specification says that boot service code won't be called
- * after ExitBootServices(). This is, in fact, a lie.
- */
- if (efi_enabled(EFI_MEMMAP))
+ if (efi_enabled(EFI_MEMMAP)) {
+ efi_fake_memmap();
+ efi_find_mirror();
+ efi_esrt_init();
+
+ /*
+ * The EFI specification says that boot service code won't be
+ * called after ExitBootServices(). This is, in fact, a lie.
+ */
efi_reserve_boot_services();
+ }
/* preallocate 4k for mptable mpc */
early_reserve_e820_mpc_new();
@@ -1137,9 +1137,7 @@
* auditing all the early-boot CR4 manipulation would be needed to
* rule it out.
*/
- if (boot_cpu_data.cpuid_level >= 0)
- /* A CPU has %cr4 if and only if it has CPUID. */
- mmu_cr4_features = __read_cr4();
+ mmu_cr4_features = __read_cr4();
memblock_set_current_limit(get_max_mapped());
@@ -1221,8 +1219,7 @@
/*
* get boot-time SMP configuration:
*/
- if (smp_found_config)
- get_smp_config();
+ get_smp_config();
prefill_possible_map();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 7a40e06..2bbd27f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -33,7 +33,7 @@
DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
EXPORT_PER_CPU_SYMBOL(this_cpu_off);
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
+unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
[0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
};
EXPORT_SYMBOL(__per_cpu_offset);
@@ -246,7 +246,7 @@
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
per_cpu(irq_stack_union.irq_stack, cpu) +
- IRQ_STACK_SIZE - 64;
+ IRQ_STACK_SIZE;
#endif
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 04cb321..763af1d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -42,6 +42,7 @@
#include <asm/syscalls.h>
#include <asm/sigframe.h>
+#include <asm/signal.h>
#define COPY(x) do { \
get_user_ex(regs->x, &sc->x); \
@@ -547,7 +548,7 @@
return -EFAULT;
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
- if (copy_siginfo_to_user32(&frame->info, &ksig->info))
+ if (__copy_siginfo_to_user32(&frame->info, &ksig->info, true))
return -EFAULT;
}
@@ -660,20 +661,21 @@
return 0;
}
-static inline int is_ia32_compat_frame(void)
+static inline int is_ia32_compat_frame(struct ksignal *ksig)
{
return IS_ENABLED(CONFIG_IA32_EMULATION) &&
- test_thread_flag(TIF_IA32);
+ ksig->ka.sa.sa_flags & SA_IA32_ABI;
}
-static inline int is_ia32_frame(void)
+static inline int is_ia32_frame(struct ksignal *ksig)
{
- return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame();
+ return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(ksig);
}
-static inline int is_x32_frame(void)
+static inline int is_x32_frame(struct ksignal *ksig)
{
- return IS_ENABLED(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
+ return IS_ENABLED(CONFIG_X86_X32_ABI) &&
+ ksig->ka.sa.sa_flags & SA_X32_ABI;
}
static int
@@ -684,12 +686,12 @@
compat_sigset_t *cset = (compat_sigset_t *) set;
/* Set up the stack frame */
- if (is_ia32_frame()) {
+ if (is_ia32_frame(ksig)) {
if (ksig->ka.sa.sa_flags & SA_SIGINFO)
return ia32_setup_rt_frame(usig, ksig, cset, regs);
else
return ia32_setup_frame(usig, ksig, cset, regs);
- } else if (is_x32_frame()) {
+ } else if (is_x32_frame(ksig)) {
return x32_setup_rt_frame(ksig, cset, regs);
} else {
return __setup_rt_frame(ksig->sig, ksig, set, regs);
@@ -783,7 +785,7 @@
* than the tracee.
*/
#ifdef CONFIG_IA32_EMULATION
- if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
+ if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index b44564b..40df337 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -1,5 +1,6 @@
#include <linux/compat.h>
#include <linux/uaccess.h>
+#include <linux/ptrace.h>
/*
* The compat_siginfo_t structure and handing code is very easy
@@ -92,10 +93,31 @@
/* any new si_fields should be added here */
}
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
+void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
+{
+ /* Don't leak in-kernel non-uapi flags to user-space */
+ if (oact)
+ oact->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
+
+ if (!act)
+ return;
+
+ /* Don't let flags to be set from userspace */
+ act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
+
+ if (user_64bit_mode(current_pt_regs()))
+ return;
+
+ if (in_ia32_syscall())
+ act->sa.sa_flags |= SA_IA32_ABI;
+ if (in_x32_syscall())
+ act->sa.sa_flags |= SA_X32_ABI;
+}
+
+int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from,
+ bool x32_ABI)
{
int err = 0;
- bool ia32 = test_thread_flag(TIF_IA32);
signal_compat_build_tests();
@@ -146,7 +168,7 @@
put_user_ex(from->si_arch, &to->si_arch);
break;
case __SI_CHLD >> 16:
- if (ia32) {
+ if (!x32_ABI) {
put_user_ex(from->si_utime, &to->si_utime);
put_user_ex(from->si_stime, &to->si_stime);
} else {
@@ -180,6 +202,12 @@
return err;
}
+/* from syscall's path, where we know the ABI */
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
+{
+ return __copy_siginfo_to_user32(to, from, in_x32_syscall());
+}
+
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
{
int err = 0;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 4296beb..42a9362 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -471,7 +471,7 @@
return false;
}
-static struct sched_domain_topology_level numa_inside_package_topology[] = {
+static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
#endif
@@ -480,22 +480,23 @@
#endif
{ NULL, },
};
+
+static struct sched_domain_topology_level x86_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+ { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+#ifdef CONFIG_SCHED_MC
+ { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+#endif
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
/*
- * set_sched_topology() sets the topology internal to a CPU. The
- * NUMA topologies are layered on top of it to build the full
- * system topology.
- *
- * If NUMA nodes are observed to occur within a CPU package, this
- * function should be called. It forces the sched domain code to
- * only use the SMT level for the CPU portion of the topology.
- * This essentially falls back to relying on NUMA information
- * from the SRAT table to describe the entire system topology
- * (except for hyperthreads).
+ * Set if a package/die has multiple NUMA nodes inside.
+ * AMD Magny-Cours and Intel Cluster-on-Die have this.
*/
-static void primarily_use_numa_for_topology(void)
-{
- set_sched_topology(numa_inside_package_topology);
-}
+static bool x86_has_numa_in_package;
void set_cpu_sibling_map(int cpu)
{
@@ -558,7 +559,7 @@
c->booted_cores = cpu_data(i).booted_cores;
}
if (match_die(c, o) && !topology_same_node(c, o))
- primarily_use_numa_for_topology();
+ x86_has_numa_in_package = true;
}
threads = cpumask_weight(topology_sibling_cpumask(cpu));
@@ -690,7 +691,7 @@
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
- if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+ if (APIC_INTEGRATED(boot_cpu_apic_version)) {
maxlvt = lapic_get_maxlvt();
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
@@ -717,7 +718,7 @@
/*
* Be paranoid about clearing APIC errors.
*/
- if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+ if (APIC_INTEGRATED(boot_cpu_apic_version)) {
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
@@ -756,7 +757,7 @@
* Determine this based on the APIC version.
* If we don't have an integrated APIC, don't send the STARTUP IPIs.
*/
- if (APIC_INTEGRATED(apic_version[phys_apicid]))
+ if (APIC_INTEGRATED(boot_cpu_apic_version))
num_starts = 2;
else
num_starts = 0;
@@ -942,7 +943,6 @@
per_cpu(cpu_current_top_of_stack, cpu) =
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
#else
- clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
#endif
}
@@ -969,7 +969,7 @@
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
- stack_start = idle->thread.sp;
+ initial_stack = idle->thread.sp;
/*
* Enable the espfix hack for this CPU
@@ -994,7 +994,7 @@
/*
* Be paranoid about clearing APIC errors.
*/
- if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+ if (APIC_INTEGRATED(boot_cpu_apic_version)) {
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
}
@@ -1115,17 +1115,8 @@
common_cpu_up(cpu, tidle);
- /*
- * We have to walk the irq descriptors to setup the vector
- * space for the cpu which comes online. Prevent irq
- * alloc/free across the bringup.
- */
- irq_lock_sparse();
-
err = do_boot_cpu(apicid, cpu, tidle);
-
if (err) {
- irq_unlock_sparse();
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
return -EIO;
}
@@ -1143,8 +1134,6 @@
touch_nmi_watchdog();
}
- irq_unlock_sparse();
-
return 0;
}
@@ -1249,7 +1238,7 @@
/*
* If we couldn't find a local APIC, then get out of here now!
*/
- if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
+ if (APIC_INTEGRATED(boot_cpu_apic_version) &&
!boot_cpu_has(X86_FEATURE_APIC)) {
if (!disable_apic) {
pr_err("BIOS bug, local APIC #%d not detected!...\n",
@@ -1304,6 +1293,16 @@
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
+
+ /*
+ * Set 'default' x86 topology, this matches default_topology() in that
+ * it has NUMA nodes as a topology level. See also
+ * native_smp_cpus_done().
+ *
+ * Must be done before set_cpus_sibling_map() is ran.
+ */
+ set_sched_topology(x86_topology);
+
set_cpu_sibling_map(0);
switch (smp_sanity_check(max_cpus)) {
@@ -1323,14 +1322,13 @@
break;
}
- default_setup_apic_routing();
-
if (read_apic_id() != boot_cpu_physical_apicid) {
panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
read_apic_id(), boot_cpu_physical_apicid);
/* Or can we switch back to PIC here? */
}
+ default_setup_apic_routing();
cpu0_logical_apicid = apic_bsp_setup(false);
pr_info("CPU%d: ", 0);
@@ -1370,6 +1368,9 @@
{
pr_debug("Boot done\n");
+ if (x86_has_numa_in_package)
+ set_sched_topology(x86_numa_in_package_topology);
+
nmi_selftest();
impress_friends();
setup_ioapic_dest();
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 4738f5e..0653788 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -8,80 +8,69 @@
#include <linux/export.h>
#include <linux/uaccess.h>
#include <asm/stacktrace.h>
+#include <asm/unwind.h>
-static int save_stack_stack(void *data, char *name)
+static int save_stack_address(struct stack_trace *trace, unsigned long addr,
+ bool nosched)
{
- return 0;
-}
-
-static int
-__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
-{
- struct stack_trace *trace = data;
-#ifdef CONFIG_FRAME_POINTER
- if (!reliable)
- return 0;
-#endif
if (nosched && in_sched_functions(addr))
return 0;
+
if (trace->skip > 0) {
trace->skip--;
return 0;
}
- if (trace->nr_entries < trace->max_entries) {
- trace->entries[trace->nr_entries++] = addr;
- return 0;
- } else {
- return -1; /* no more room, stop walking the stack */
+
+ if (trace->nr_entries >= trace->max_entries)
+ return -1;
+
+ trace->entries[trace->nr_entries++] = addr;
+ return 0;
+}
+
+static void __save_stack_trace(struct stack_trace *trace,
+ struct task_struct *task, struct pt_regs *regs,
+ bool nosched)
+{
+ struct unwind_state state;
+ unsigned long addr;
+
+ if (regs)
+ save_stack_address(trace, regs->ip, nosched);
+
+ for (unwind_start(&state, task, regs, NULL); !unwind_done(&state);
+ unwind_next_frame(&state)) {
+ addr = unwind_get_return_address(&state);
+ if (!addr || save_stack_address(trace, addr, nosched))
+ break;
}
+
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
}
-static int save_stack_address(void *data, unsigned long addr, int reliable)
-{
- return __save_stack_address(data, addr, reliable, false);
-}
-
-static int
-save_stack_address_nosched(void *data, unsigned long addr, int reliable)
-{
- return __save_stack_address(data, addr, reliable, true);
-}
-
-static const struct stacktrace_ops save_stack_ops = {
- .stack = save_stack_stack,
- .address = save_stack_address,
- .walk_stack = print_context_stack,
-};
-
-static const struct stacktrace_ops save_stack_ops_nosched = {
- .stack = save_stack_stack,
- .address = save_stack_address_nosched,
- .walk_stack = print_context_stack,
-};
-
/*
* Save stack-backtrace addresses into a stack_trace buffer.
*/
void save_stack_trace(struct stack_trace *trace)
{
- dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
+ __save_stack_trace(trace, current, NULL, false);
}
EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
{
- dump_trace(current, regs, NULL, 0, &save_stack_ops, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
+ __save_stack_trace(trace, current, regs, false);
}
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
- dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
+ if (!try_get_task_stack(tsk))
+ return;
+
+ __save_stack_trace(trace, tsk, NULL, true);
+
+ put_task_stack(tsk);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 654f6c6..8402907 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -188,12 +188,12 @@
tboot->num_mac_regions = 0;
- for (i = 0; i < e820.nr_map; i++) {
- if ((e820.map[i].type != E820_RAM)
- && (e820.map[i].type != E820_RESERVED_KERN))
+ for (i = 0; i < e820->nr_map; i++) {
+ if ((e820->map[i].type != E820_RAM)
+ && (e820->map[i].type != E820_RESERVED_KERN))
continue;
- add_mac_region(e820.map[i].addr, e820.map[i].size);
+ add_mac_region(e820->map[i].addr, e820->map[i].size);
}
tboot->acpi_sinfo.kernel_s3_resume_vector =
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b70ca12..bd4e3d4 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -292,12 +292,30 @@
DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
+#ifdef CONFIG_VMAP_STACK
+__visible void __noreturn handle_stack_overflow(const char *message,
+ struct pt_regs *regs,
+ unsigned long fault_address)
+{
+ printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n",
+ (void *)fault_address, current->stack,
+ (char *)current->stack + THREAD_SIZE - 1);
+ die(message, regs, 0);
+
+ /* Be absolutely certain we don't return. */
+ panic(message);
+}
+#endif
+
#ifdef CONFIG_X86_64
/* Runs on IST stack */
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
{
static const char str[] = "double fault";
struct task_struct *tsk = current;
+#ifdef CONFIG_VMAP_STACK
+ unsigned long cr2;
+#endif
#ifdef CONFIG_X86_ESPFIX64
extern unsigned char native_irq_return_iret[];
@@ -332,6 +350,49 @@
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_DF;
+#ifdef CONFIG_VMAP_STACK
+ /*
+ * If we overflow the stack into a guard page, the CPU will fail
+ * to deliver #PF and will send #DF instead. Similarly, if we
+ * take any non-IST exception while too close to the bottom of
+ * the stack, the processor will get a page fault while
+ * delivering the exception and will generate a double fault.
+ *
+ * According to the SDM (footnote in 6.15 under "Interrupt 14 -
+ * Page-Fault Exception (#PF):
+ *
+ * Processors update CR2 whenever a page fault is detected. If a
+ * second page fault occurs while an earlier page fault is being
+ * deliv- ered, the faulting linear address of the second fault will
+ * overwrite the contents of CR2 (replacing the previous
+ * address). These updates to CR2 occur even if the page fault
+ * results in a double fault or occurs during the delivery of a
+ * double fault.
+ *
+ * The logic below has a small possibility of incorrectly diagnosing
+ * some errors as stack overflows. For example, if the IDT or GDT
+ * gets corrupted such that #GP delivery fails due to a bad descriptor
+ * causing #GP and we hit this condition while CR2 coincidentally
+ * points to the stack guard page, we'll think we overflowed the
+ * stack. Given that we're going to panic one way or another
+ * if this happens, this isn't necessarily worth fixing.
+ *
+ * If necessary, we could improve the test by only diagnosing
+ * a stack overflow if the saved RSP points within 47 bytes of
+ * the bottom of the stack: if RSP == tsk_stack + 48 and we
+ * take an exception, the stack is already aligned and there
+ * will be enough room SS, RSP, RFLAGS, CS, RIP, and a
+ * possible error code, so a stack overflow would *not* double
+ * fault. With any less space left, exception delivery could
+ * fail, and, as a practical matter, we've overflowed the
+ * stack even if the actual trigger for the double fault was
+ * something else.
+ */
+ cr2 = read_cr2();
+ if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
+ handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
+#endif
+
#ifdef CONFIG_DOUBLEFAULT
df_debug(regs, error_code);
#endif
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 78b9cb5..46b2f41 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -23,6 +23,7 @@
#include <asm/x86_init.h>
#include <asm/geode.h>
#include <asm/apic.h>
+#include <asm/intel-family.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -686,11 +687,16 @@
if (crystal_khz == 0) {
switch (boot_cpu_data.x86_model) {
- case 0x4E: /* SKL */
- case 0x5E: /* SKL */
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
crystal_khz = 24000; /* 24.0 MHz */
break;
- case 0x5C: /* BXT */
+ case INTEL_FAM6_SKYLAKE_X:
+ crystal_khz = 25000; /* 25.0 MHz */
+ break;
+ case INTEL_FAM6_ATOM_GOLDMONT:
crystal_khz = 19200; /* 19.2 MHz */
break;
}
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
new file mode 100644
index 0000000..a2456d4
--- /dev/null
+++ b/arch/x86/kernel/unwind_frame.c
@@ -0,0 +1,93 @@
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+#include <asm/bitops.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+
+#define FRAME_HEADER_SIZE (sizeof(long) * 2)
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+ unsigned long addr;
+ unsigned long *addr_p = unwind_get_return_address_ptr(state);
+
+ if (unwind_done(state))
+ return 0;
+
+ addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
+ addr_p);
+
+ return __kernel_text_address(addr) ? addr : 0;
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+static bool update_stack_state(struct unwind_state *state, void *addr,
+ size_t len)
+{
+ struct stack_info *info = &state->stack_info;
+
+ /*
+ * If addr isn't on the current stack, switch to the next one.
+ *
+ * We may have to traverse multiple stacks to deal with the possibility
+ * that 'info->next_sp' could point to an empty stack and 'addr' could
+ * be on a subsequent stack.
+ */
+ while (!on_stack(info, addr, len))
+ if (get_stack_info(info->next_sp, state->task, info,
+ &state->stack_mask))
+ return false;
+
+ return true;
+}
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+ unsigned long *next_bp;
+
+ if (unwind_done(state))
+ return false;
+
+ next_bp = (unsigned long *)*state->bp;
+
+ /* make sure the next frame's data is accessible */
+ if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
+ return false;
+
+ /* move to the next frame */
+ state->bp = next_bp;
+ return true;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+ struct pt_regs *regs, unsigned long *first_frame)
+{
+ memset(state, 0, sizeof(*state));
+ state->task = task;
+
+ /* don't even attempt to start from user mode regs */
+ if (regs && user_mode(regs)) {
+ state->stack_info.type = STACK_TYPE_UNKNOWN;
+ return;
+ }
+
+ /* set up the starting stack frame */
+ state->bp = get_frame_pointer(task, regs);
+
+ /* initialize stack info and make sure the frame data is accessible */
+ get_stack_info(state->bp, state->task, &state->stack_info,
+ &state->stack_mask);
+ update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
+
+ /*
+ * The caller can provide the address of the first frame directly
+ * (first_frame) or indirectly (regs->sp) to indicate which stack frame
+ * to start unwinding at. Skip ahead until we reach it.
+ */
+ while (!unwind_done(state) &&
+ (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
+ state->bp < first_frame))
+ unwind_next_frame(state);
+}
+EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
new file mode 100644
index 0000000..b5a834c
--- /dev/null
+++ b/arch/x86/kernel/unwind_guess.c
@@ -0,0 +1,43 @@
+#include <linux/sched.h>
+#include <linux/ftrace.h>
+#include <asm/ptrace.h>
+#include <asm/bitops.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+ struct stack_info *info = &state->stack_info;
+
+ if (unwind_done(state))
+ return false;
+
+ do {
+ for (state->sp++; state->sp < info->end; state->sp++)
+ if (__kernel_text_address(*state->sp))
+ return true;
+
+ state->sp = info->next_sp;
+
+ } while (!get_stack_info(state->sp, state->task, info,
+ &state->stack_mask));
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+ struct pt_regs *regs, unsigned long *first_frame)
+{
+ memset(state, 0, sizeof(*state));
+
+ state->task = task;
+ state->sp = first_frame;
+
+ get_stack_info(first_frame, state->task, &state->stack_info,
+ &state->stack_mask);
+
+ if (!__kernel_text_address(*first_frame))
+ unwind_next_frame(state);
+}
+EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 95e49f6..b2cee3d1 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -38,7 +38,7 @@
EXPORT_SYMBOL(_copy_from_user);
EXPORT_SYMBOL(_copy_to_user);
-EXPORT_SYMBOL_GPL(memcpy_mcsafe);
+EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled);
EXPORT_SYMBOL(copy_page);
EXPORT_SYMBOL(clear_page);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 76c5e52..0bd9f12 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -91,7 +91,7 @@
static void default_nmi_init(void) { };
static int default_i8042_detect(void) { return 1; };
-struct x86_platform_ops x86_platform = {
+struct x86_platform_ops x86_platform __ro_after_init = {
.calibrate_cpu = native_calibrate_cpu,
.calibrate_tsc = native_calibrate_tsc,
.get_wallclock = mach_get_cmos_time,
@@ -108,7 +108,7 @@
EXPORT_SYMBOL_GPL(x86_platform);
#if defined(CONFIG_PCI_MSI)
-struct x86_msi_ops x86_msi = {
+struct x86_msi_ops x86_msi __ro_after_init = {
.setup_msi_irqs = native_setup_msi_irqs,
.teardown_msi_irq = native_teardown_msi_irq,
.teardown_msi_irqs = default_teardown_msi_irqs,
@@ -137,7 +137,7 @@
}
#endif
-struct x86_io_apic_ops x86_io_apic_ops = {
+struct x86_io_apic_ops x86_io_apic_ops __ro_after_init = {
.read = native_io_apic_read,
.disable = native_disable_io_apic,
};
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index af523d8..1e6b84b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4961,7 +4961,7 @@
avic_handle_ldr_update(vcpu);
}
-static struct kvm_x86_ops svm_x86_ops = {
+static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
.hardware_setup = svm_hardware_setup,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5cede40..121fdf6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11177,7 +11177,7 @@
~FEATURE_CONTROL_LMCE;
}
-static struct kvm_x86_ops vmx_x86_ops = {
+static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
.hardware_setup = hardware_setup,
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 2ec0b0abb..49e6eba 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -181,11 +181,11 @@
#ifndef CONFIG_UML
/*
- * memcpy_mcsafe - memory copy with machine check exception handling
+ * memcpy_mcsafe_unrolled - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
-ENTRY(memcpy_mcsafe)
+ENTRY(memcpy_mcsafe_unrolled)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
@@ -273,7 +273,7 @@
.L_done_memcpy_trap:
xorq %rax, %rax
ret
-ENDPROC(memcpy_mcsafe)
+ENDPROC(memcpy_mcsafe_unrolled)
.section .fixup, "ax"
/* Return -EFAULT for any failure */
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
index ba47524..d1c7de0 100644
--- a/arch/x86/mm/amdtopology.c
+++ b/arch/x86/mm/amdtopology.c
@@ -52,21 +52,6 @@
return -ENOENT;
}
-static __init void early_get_boot_cpu_id(void)
-{
- /*
- * need to get the APIC ID of the BSP so can use that to
- * create apicid_to_node in amd_scan_nodes()
- */
-#ifdef CONFIG_X86_MPPARSE
- /*
- * get boot-time SMP configuration:
- */
- if (smp_found_config)
- early_get_smp_config();
-#endif
-}
-
int __init amd_numa_init(void)
{
u64 start = PFN_PHYS(0);
@@ -180,8 +165,11 @@
cores = 1 << bits;
apicid_base = 0;
- /* get the APIC ID of the BSP early for systems with apicid lifting */
- early_get_boot_cpu_id();
+ /*
+ * get boot-time SMP configuration:
+ */
+ early_get_smp_config();
+
if (boot_cpu_physical_apicid > 0) {
pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
apicid_base = boot_cpu_physical_apicid;
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 832b98f..79ae939 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/extable.h>
#include <asm/uaccess.h>
#include <asm/traps.h>
#include <asm/kdebug.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index dc80230..1e52512 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -5,7 +5,7 @@
*/
#include <linux/sched.h> /* test_thread_flag(), ... */
#include <linux/kdebug.h> /* oops_begin/end, ... */
-#include <linux/module.h> /* search_exception_table */
+#include <linux/extable.h> /* search_exception_table */
#include <linux/bootmem.h> /* max_low_pfn */
#include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
@@ -753,6 +753,38 @@
return;
}
+#ifdef CONFIG_VMAP_STACK
+ /*
+ * Stack overflow? During boot, we can fault near the initial
+ * stack in the direct map, but that's not an overflow -- check
+ * that we're in vmalloc space to avoid this.
+ */
+ if (is_vmalloc_addr((void *)address) &&
+ (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
+ address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
+ register void *__sp asm("rsp");
+ unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
+ /*
+ * We're likely to be running with very little stack space
+ * left. It's plausible that we'd hit this condition but
+ * double-fault even before we get this far, in which case
+ * we're fine: the double-fault handler will deal with it.
+ *
+ * We don't want to make it all the way into the oops code
+ * and then double-fault, though, because we're likely to
+ * break the console driver and lose most of the stack dump.
+ */
+ asm volatile ("movq %[stack], %%rsp\n\t"
+ "call handle_stack_overflow\n\t"
+ "1: jmp 1b"
+ : "+r" (__sp)
+ : "D" ("kernel stack overflow (page fault)"),
+ "S" (regs), "d" (address),
+ [stack] "rm" (stack));
+ unreachable();
+ }
+#endif
+
/*
* 32-bit:
*
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d28a2d7..22af912 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -699,8 +699,10 @@
}
}
-void free_initmem(void)
+void __ref free_initmem(void)
{
+ e820_reallocate_tables();
+
free_init_pages("unused kernel",
(unsigned long)(&__init_begin),
(unsigned long)(&__init_end));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index bda8d5e..ddd2661 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -40,17 +40,26 @@
* You need to add an if/def entry if you introduce a new memory region
* compatible with KASLR. Your entry must be in logical order with memory
* layout. For example, ESPFIX is before EFI because its virtual address is
- * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to
+ * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
* ensure that this order is correct and won't be changed.
*/
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
-static const unsigned long vaddr_end = VMEMMAP_START;
+
+#if defined(CONFIG_X86_ESPFIX64)
+static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
+#elif defined(CONFIG_EFI)
+static const unsigned long vaddr_end = EFI_VA_START;
+#else
+static const unsigned long vaddr_end = __START_KERNEL_map;
+#endif
/* Default values */
unsigned long page_offset_base = __PAGE_OFFSET_BASE;
EXPORT_SYMBOL(page_offset_base);
unsigned long vmalloc_base = __VMALLOC_BASE;
EXPORT_SYMBOL(vmalloc_base);
+unsigned long vmemmap_base = __VMEMMAP_BASE;
+EXPORT_SYMBOL(vmemmap_base);
/*
* Memory regions randomized by KASLR (except modules that use a separate logic
@@ -63,6 +72,7 @@
} kaslr_regions[] = {
{ &page_offset_base, 64/* Maximum */ },
{ &vmalloc_base, VMALLOC_SIZE_TB },
+ { &vmemmap_base, 1 },
};
/* Get size in bytes used by the memory region */
@@ -89,6 +99,18 @@
struct rnd_state rand_state;
unsigned long remain_entropy;
+ /*
+ * All these BUILD_BUG_ON checks ensures the memory layout is
+ * consistent with the vaddr_start/vaddr_end variables.
+ */
+ BUILD_BUG_ON(vaddr_start >= vaddr_end);
+ BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) &&
+ vaddr_end >= EFI_VA_START);
+ BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) ||
+ config_enabled(CONFIG_EFI)) &&
+ vaddr_end >= __START_KERNEL_map);
+ BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
+
if (!kaslr_memory_enabled())
return;
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index fb68210..3f35b48 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -722,22 +722,19 @@
numa_init(dummy_numa_init);
}
-static __init int find_near_online_node(int node)
+static void __init init_memory_less_node(int nid)
{
- int n, val;
- int min_val = INT_MAX;
- int best_node = -1;
+ unsigned long zones_size[MAX_NR_ZONES] = {0};
+ unsigned long zholes_size[MAX_NR_ZONES] = {0};
- for_each_online_node(n) {
- val = node_distance(node, n);
+ /* Allocate and initialize node data. Memory-less node is now online.*/
+ alloc_node_data(nid);
+ free_area_init_node(nid, zones_size, 0, zholes_size);
- if (val < min_val) {
- min_val = val;
- best_node = n;
- }
- }
-
- return best_node;
+ /*
+ * All zonelists will be built later in start_kernel() after per cpu
+ * areas are initialized.
+ */
}
/*
@@ -766,8 +763,10 @@
if (node == NUMA_NO_NODE)
continue;
+
if (!node_online(node))
- node = find_near_online_node(node);
+ init_memory_less_node(node);
+
numa_set_node(cpu, node);
}
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 849dc09..e3353c9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -917,11 +917,11 @@
}
}
-static int populate_pmd(struct cpa_data *cpa,
- unsigned long start, unsigned long end,
- unsigned num_pages, pud_t *pud, pgprot_t pgprot)
+static long populate_pmd(struct cpa_data *cpa,
+ unsigned long start, unsigned long end,
+ unsigned num_pages, pud_t *pud, pgprot_t pgprot)
{
- unsigned int cur_pages = 0;
+ long cur_pages = 0;
pmd_t *pmd;
pgprot_t pmd_pgprot;
@@ -991,12 +991,12 @@
return num_pages;
}
-static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
- pgprot_t pgprot)
+static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
+ pgprot_t pgprot)
{
pud_t *pud;
unsigned long end;
- int cur_pages = 0;
+ long cur_pages = 0;
pgprot_t pud_pgprot;
end = start + (cpa->numpages << PAGE_SHIFT);
@@ -1052,7 +1052,7 @@
/* Map trailing leftover */
if (start < end) {
- int tmp;
+ long tmp;
pud = pud_offset(pgd, start);
if (pud_none(*pud))
@@ -1078,7 +1078,7 @@
pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
pud_t *pud = NULL; /* shut up gcc */
pgd_t *pgd_entry;
- int ret;
+ long ret;
pgd_entry = cpa->pgd + pgd_index(addr);
@@ -1327,7 +1327,8 @@
static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
{
- int ret, numpages = cpa->numpages;
+ unsigned long numpages = cpa->numpages;
+ int ret;
while (numpages) {
/*
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index de391b7..159b52c 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -254,9 +254,7 @@
struct memtype *rbt_memtype_lookup(u64 addr)
{
- struct memtype *data;
- data = memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE);
- return data;
+ return memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE);
}
#if defined(CONFIG_DEBUG_FS)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 4dbe656..a7655f6 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -77,10 +77,25 @@
unsigned cpu = smp_processor_id();
if (likely(prev != next)) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ /*
+ * If our current stack is in vmalloc space and isn't
+ * mapped in the new pgd, we'll double-fault. Forcibly
+ * map it.
+ */
+ unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
+
+ pgd_t *pgd = next->pgd + stack_pgd_index;
+
+ if (unlikely(pgd_none(*pgd)))
+ set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
+ }
+
#ifdef CONFIG_SMP
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
this_cpu_write(cpu_tlbstate.active_mm, next);
#endif
+
cpumask_set_cpu(cpu, mm_cpumask(next));
/*
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index cb31a44..a2488b6 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -16,27 +16,7 @@
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
-
-static int backtrace_stack(void *data, char *name)
-{
- /* Yes, we want all stacks */
- return 0;
-}
-
-static int backtrace_address(void *data, unsigned long addr, int reliable)
-{
- unsigned int *depth = data;
-
- if ((*depth)--)
- oprofile_add_trace(addr);
- return 0;
-}
-
-static struct stacktrace_ops backtrace_ops = {
- .stack = backtrace_stack,
- .address = backtrace_address,
- .walk_stack = print_context_stack,
-};
+#include <asm/unwind.h>
#ifdef CONFIG_COMPAT
static struct stack_frame_ia32 *
@@ -113,10 +93,29 @@
struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
if (!user_mode(regs)) {
- unsigned long stack = kernel_stack_pointer(regs);
- if (depth)
- dump_trace(NULL, regs, (unsigned long *)stack, 0,
- &backtrace_ops, &depth);
+ struct unwind_state state;
+ unsigned long addr;
+
+ if (!depth)
+ return;
+
+ oprofile_add_trace(regs->ip);
+
+ if (!--depth)
+ return;
+
+ for (unwind_start(&state, current, regs, NULL);
+ !unwind_done(&state); unwind_next_frame(&state)) {
+ addr = unwind_get_return_address(&state);
+ if (!addr)
+ break;
+
+ oprofile_add_trace(addr);
+
+ if (!--depth)
+ break;
+ }
+
return;
}
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 9770e55..1d97cea 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -120,9 +120,12 @@
static struct {
unsigned long address;
unsigned short segment;
-} pci_indirect = { 0, __KERNEL_CS };
+} pci_indirect __ro_after_init = {
+ .address = 0,
+ .segment = __KERNEL_CS,
+};
-static int pci_bios_present;
+static int pci_bios_present __ro_after_init;
static int __init check_pcibios(void)
{
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 184842e..3c3c19e 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -8,6 +8,7 @@
obj-y += intel/
obj-y += intel-mid/
obj-y += intel-quark/
+obj-y += mellanox/
obj-y += olpc/
obj-y += scx200/
obj-y += sfi/
diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
index 8ff7b93..d49d3be 100644
--- a/arch/x86/platform/atom/punit_atom_debug.c
+++ b/arch/x86/platform/atom/punit_atom_debug.c
@@ -155,7 +155,7 @@
static const struct x86_cpu_id intel_punit_cpu_ids[] = {
ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt),
- ICPU(INTEL_FAM6_ATOM_MERRIFIELD1, punit_device_tng),
+ ICPU(INTEL_FAM6_ATOM_MERRIFIELD, punit_device_tng),
ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht),
{}
};
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index 6a2f569..6aad870 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -82,21 +82,12 @@
}
bgrt_image_size = bmp_header.size;
- bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN);
+ bgrt_image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB);
if (!bgrt_image) {
- pr_notice("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n",
- bgrt_image_size);
- return;
- }
-
- image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB);
- if (!image) {
pr_notice("Ignoring BGRT: failed to map image memory\n");
- kfree(bgrt_image);
bgrt_image = NULL;
return;
}
- memcpy(bgrt_image, image, bgrt_image_size);
- memunmap(image);
+ efi_mem_reserve(bgrt_tab->image_address, bgrt_image_size);
}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 1fbb408..bf99aa7 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -166,13 +166,15 @@
}
e820_add_region(start, size, e820_type);
}
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
}
int __init efi_memblock_x86_reserve_range(void)
{
struct efi_info *e = &boot_params.efi_info;
+ struct efi_memory_map_data data;
phys_addr_t pmap;
+ int rv;
if (efi_enabled(EFI_PARAVIRT))
return 0;
@@ -187,11 +189,17 @@
#else
pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
#endif
- efi.memmap.phys_map = pmap;
- efi.memmap.nr_map = e->efi_memmap_size /
- e->efi_memdesc_size;
- efi.memmap.desc_size = e->efi_memdesc_size;
- efi.memmap.desc_version = e->efi_memdesc_version;
+ data.phys_map = pmap;
+ data.size = e->efi_memmap_size;
+ data.desc_size = e->efi_memdesc_size;
+ data.desc_version = e->efi_memdesc_version;
+
+ rv = efi_memmap_init_early(&data);
+ if (rv)
+ return rv;
+
+ if (add_efi_memmap)
+ do_add_efi_memmap();
WARN(efi.memmap.desc_version != 1,
"Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
@@ -218,19 +226,6 @@
}
}
-void __init efi_unmap_memmap(void)
-{
- unsigned long size;
-
- clear_bit(EFI_MEMMAP, &efi.flags);
-
- size = efi.memmap.nr_map * efi.memmap.desc_size;
- if (efi.memmap.map) {
- early_memunmap(efi.memmap.map, size);
- efi.memmap.map = NULL;
- }
-}
-
static int __init efi_systab_init(void *phys)
{
if (efi_enabled(EFI_64BIT)) {
@@ -414,33 +409,6 @@
return 0;
}
-static int __init efi_memmap_init(void)
-{
- unsigned long addr, size;
-
- if (efi_enabled(EFI_PARAVIRT))
- return 0;
-
- /* Map the EFI memory map */
- size = efi.memmap.nr_map * efi.memmap.desc_size;
- addr = (unsigned long)efi.memmap.phys_map;
-
- efi.memmap.map = early_memremap(addr, size);
- if (efi.memmap.map == NULL) {
- pr_err("Could not map the memory map!\n");
- return -ENOMEM;
- }
-
- efi.memmap.map_end = efi.memmap.map + size;
-
- if (add_efi_memmap)
- do_add_efi_memmap();
-
- set_bit(EFI_MEMMAP, &efi.flags);
-
- return 0;
-}
-
void __init efi_init(void)
{
efi_char16_t *c16;
@@ -498,16 +466,14 @@
if (!efi_runtime_supported())
pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
else {
- if (efi_runtime_disabled() || efi_runtime_init())
+ if (efi_runtime_disabled() || efi_runtime_init()) {
+ efi_memmap_unmap();
return;
+ }
}
- if (efi_memmap_init())
- return;
if (efi_enabled(EFI_DBG))
efi_print_memmap();
-
- efi_esrt_init();
}
void __init efi_late_init(void)
@@ -624,42 +590,6 @@
}
}
-static void __init save_runtime_map(void)
-{
-#ifdef CONFIG_KEXEC_CORE
- unsigned long desc_size;
- efi_memory_desc_t *md;
- void *tmp, *q = NULL;
- int count = 0;
-
- if (efi_enabled(EFI_OLD_MEMMAP))
- return;
-
- desc_size = efi.memmap.desc_size;
-
- for_each_efi_memory_desc(md) {
- if (!(md->attribute & EFI_MEMORY_RUNTIME) ||
- (md->type == EFI_BOOT_SERVICES_CODE) ||
- (md->type == EFI_BOOT_SERVICES_DATA))
- continue;
- tmp = krealloc(q, (count + 1) * desc_size, GFP_KERNEL);
- if (!tmp)
- goto out;
- q = tmp;
-
- memcpy(q + count * desc_size, md, desc_size);
- count++;
- }
-
- efi_runtime_map_setup(q, count, desc_size);
- return;
-
-out:
- kfree(q);
- pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
-#endif
-}
-
static void *realloc_pages(void *old_memmap, int old_shift)
{
void *ret;
@@ -745,6 +675,46 @@
return entry;
}
+static bool should_map_region(efi_memory_desc_t *md)
+{
+ /*
+ * Runtime regions always require runtime mappings (obviously).
+ */
+ if (md->attribute & EFI_MEMORY_RUNTIME)
+ return true;
+
+ /*
+ * 32-bit EFI doesn't suffer from the bug that requires us to
+ * reserve boot services regions, and mixed mode support
+ * doesn't exist for 32-bit kernels.
+ */
+ if (IS_ENABLED(CONFIG_X86_32))
+ return false;
+
+ /*
+ * Map all of RAM so that we can access arguments in the 1:1
+ * mapping when making EFI runtime calls.
+ */
+ if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_is_native()) {
+ if (md->type == EFI_CONVENTIONAL_MEMORY ||
+ md->type == EFI_LOADER_DATA ||
+ md->type == EFI_LOADER_CODE)
+ return true;
+ }
+
+ /*
+ * Map boot services regions as a workaround for buggy
+ * firmware that accesses them even when they shouldn't.
+ *
+ * See efi_{reserve,free}_boot_services().
+ */
+ if (md->type == EFI_BOOT_SERVICES_CODE ||
+ md->type == EFI_BOOT_SERVICES_DATA)
+ return true;
+
+ return false;
+}
+
/*
* Map the efi memory ranges of the runtime services and update new_mmap with
* virtual addresses.
@@ -761,13 +731,9 @@
p = NULL;
while ((p = efi_map_next_entry(p))) {
md = p;
- if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
-#ifdef CONFIG_X86_64
- if (md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA)
-#endif
- continue;
- }
+
+ if (!should_map_region(md))
+ continue;
efi_map_region(md);
get_systab_virt_addr(md);
@@ -803,7 +769,7 @@
* non-native EFI
*/
if (!efi_is_native()) {
- efi_unmap_memmap();
+ efi_memmap_unmap();
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return;
}
@@ -823,7 +789,18 @@
get_systab_virt_addr(md);
}
- save_runtime_map();
+ /*
+ * Unregister the early EFI memmap from efi_init() and install
+ * the new EFI memory map.
+ */
+ efi_memmap_unmap();
+
+ if (efi_memmap_init_late(efi.memmap.phys_map,
+ efi.memmap.desc_size * efi.memmap.nr_map)) {
+ pr_err("Failed to remap late EFI memory map\n");
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ return;
+ }
BUG_ON(!efi.systab);
@@ -884,6 +861,7 @@
int count = 0, pg_shift = 0;
void *new_memmap = NULL;
efi_status_t status;
+ phys_addr_t pa;
efi.systab = NULL;
@@ -901,11 +879,24 @@
return;
}
- save_runtime_map();
+ pa = __pa(new_memmap);
+
+ /*
+ * Unregister the early EFI memmap from efi_init() and install
+ * the new EFI memory map that we are about to pass to the
+ * firmware via SetVirtualAddressMap().
+ */
+ efi_memmap_unmap();
+
+ if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) {
+ pr_err("Failed to remap late EFI memory map\n");
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ return;
+ }
BUG_ON(!efi.systab);
- if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) {
+ if (efi_setup_page_tables(pa, 1 << pg_shift)) {
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return;
}
@@ -917,14 +908,14 @@
efi.memmap.desc_size * count,
efi.memmap.desc_size,
efi.memmap.desc_version,
- (efi_memory_desc_t *)__pa(new_memmap));
+ (efi_memory_desc_t *)pa);
} else {
status = efi_thunk_set_virtual_address_map(
efi_phys.set_virtual_address_map,
efi.memmap.desc_size * count,
efi.memmap.desc_size,
efi.memmap.desc_version,
- (efi_memory_desc_t *)__pa(new_memmap));
+ (efi_memory_desc_t *)pa);
}
if (status != EFI_SUCCESS) {
@@ -956,15 +947,6 @@
efi_runtime_update_mappings();
efi_dump_pagetable();
- /*
- * We mapped the descriptor array into the EFI pagetable above
- * but we're not unmapping it here because if we're running in
- * EFI mixed mode we need all of memory to be accessible when
- * we pass parameters to the EFI runtime services in the
- * thunking code.
- */
- free_pages((unsigned long)new_memmap, pg_shift);
-
/* clean DUMMY object */
efi_delete_dummy_variable();
}
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 677e29e..58b0f80 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -85,7 +85,7 @@
early_code_mapping_set_exec(1);
n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
- save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL);
+ save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
for (pgd = 0; pgd < n_pgds; pgd++) {
save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
@@ -214,7 +214,6 @@
int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
{
unsigned long pfn, text;
- efi_memory_desc_t *md;
struct page *page;
unsigned npages;
pgd_t *pgd;
@@ -245,28 +244,9 @@
* text and allocate a new stack because we can't rely on the
* stack pointer being < 4GB.
*/
- if (!IS_ENABLED(CONFIG_EFI_MIXED))
+ if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native())
return 0;
- /*
- * Map all of RAM so that we can access arguments in the 1:1
- * mapping when making EFI runtime calls.
- */
- for_each_efi_memory_desc(md) {
- if (md->type != EFI_CONVENTIONAL_MEMORY &&
- md->type != EFI_LOADER_DATA &&
- md->type != EFI_LOADER_CODE)
- continue;
-
- pfn = md->phys_addr >> PAGE_SHIFT;
- npages = md->num_pages;
-
- if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, npages, _PAGE_RW)) {
- pr_err("Failed to map 1:1 memory\n");
- return 1;
- }
- }
-
page = alloc_page(GFP_KERNEL|__GFP_DMA32);
if (!page)
panic("Unable to allocate EFI runtime stack < 4GB\n");
@@ -359,6 +339,7 @@
*/
void __init efi_map_region_fixed(efi_memory_desc_t *md)
{
+ __map_region(md, md->phys_addr);
__map_region(md, md->virt_addr);
}
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 89d1146..10aca63 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -164,6 +164,75 @@
EXPORT_SYMBOL_GPL(efi_query_variable_store);
/*
+ * The UEFI specification makes it clear that the operating system is
+ * free to do whatever it wants with boot services code after
+ * ExitBootServices() has been called. Ignoring this recommendation a
+ * significant bunch of EFI implementations continue calling into boot
+ * services code (SetVirtualAddressMap). In order to work around such
+ * buggy implementations we reserve boot services region during EFI
+ * init and make sure it stays executable. Then, after
+ * SetVirtualAddressMap(), it is discarded.
+ *
+ * However, some boot services regions contain data that is required
+ * by drivers, so we need to track which memory ranges can never be
+ * freed. This is done by tagging those regions with the
+ * EFI_MEMORY_RUNTIME attribute.
+ *
+ * Any driver that wants to mark a region as reserved must use
+ * efi_mem_reserve() which will insert a new EFI memory descriptor
+ * into efi.memmap (splitting existing regions if necessary) and tag
+ * it with EFI_MEMORY_RUNTIME.
+ */
+void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
+{
+ phys_addr_t new_phys, new_size;
+ struct efi_mem_range mr;
+ efi_memory_desc_t md;
+ int num_entries;
+ void *new;
+
+ if (efi_mem_desc_lookup(addr, &md)) {
+ pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr);
+ return;
+ }
+
+ if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) {
+ pr_err("Region spans EFI memory descriptors, %pa\n", &addr);
+ return;
+ }
+
+ size += addr % EFI_PAGE_SIZE;
+ size = round_up(size, EFI_PAGE_SIZE);
+ addr = round_down(addr, EFI_PAGE_SIZE);
+
+ mr.range.start = addr;
+ mr.range.end = addr + size - 1;
+ mr.attribute = md.attribute | EFI_MEMORY_RUNTIME;
+
+ num_entries = efi_memmap_split_count(&md, &mr.range);
+ num_entries += efi.memmap.nr_map;
+
+ new_size = efi.memmap.desc_size * num_entries;
+
+ new_phys = memblock_alloc(new_size, 0);
+ if (!new_phys) {
+ pr_err("Could not allocate boot services memmap\n");
+ return;
+ }
+
+ new = early_memremap(new_phys, new_size);
+ if (!new) {
+ pr_err("Failed to map new boot services memmap\n");
+ return;
+ }
+
+ efi_memmap_insert(&efi.memmap, new, &mr);
+ early_memunmap(new, new_size);
+
+ efi_memmap_install(new_phys, num_entries);
+}
+
+/*
* Helper function for efi_reserve_boot_services() to figure out if we
* can free regions in efi_free_boot_services().
*
@@ -184,15 +253,6 @@
return true;
}
-/*
- * The UEFI specification makes it clear that the operating system is free to do
- * whatever it wants with boot services code after ExitBootServices() has been
- * called. Ignoring this recommendation a significant bunch of EFI implementations
- * continue calling into boot services code (SetVirtualAddressMap). In order to
- * work around such buggy implementations we reserve boot services region during
- * EFI init and make sure it stays executable. Then, after SetVirtualAddressMap(), it
-* is discarded.
-*/
void __init efi_reserve_boot_services(void)
{
efi_memory_desc_t *md;
@@ -249,7 +309,10 @@
void __init efi_free_boot_services(void)
{
+ phys_addr_t new_phys, new_size;
efi_memory_desc_t *md;
+ int num_entries = 0;
+ void *new, *new_md;
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
@@ -257,12 +320,16 @@
size_t rm_size;
if (md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA)
+ md->type != EFI_BOOT_SERVICES_DATA) {
+ num_entries++;
continue;
+ }
/* Do not free, someone else owns it: */
- if (md->attribute & EFI_MEMORY_RUNTIME)
+ if (md->attribute & EFI_MEMORY_RUNTIME) {
+ num_entries++;
continue;
+ }
/*
* Nasty quirk: if all sub-1MB memory is used for boot
@@ -287,7 +354,41 @@
free_bootmem_late(start, size);
}
- efi_unmap_memmap();
+ new_size = efi.memmap.desc_size * num_entries;
+ new_phys = memblock_alloc(new_size, 0);
+ if (!new_phys) {
+ pr_err("Failed to allocate new EFI memmap\n");
+ return;
+ }
+
+ new = memremap(new_phys, new_size, MEMREMAP_WB);
+ if (!new) {
+ pr_err("Failed to map new EFI memmap\n");
+ return;
+ }
+
+ /*
+ * Build a new EFI memmap that excludes any boot services
+ * regions that are not tagged EFI_MEMORY_RUNTIME, since those
+ * regions have now been freed.
+ */
+ new_md = new;
+ for_each_efi_memory_desc(md) {
+ if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
+ (md->type == EFI_BOOT_SERVICES_CODE ||
+ md->type == EFI_BOOT_SERVICES_DATA))
+ continue;
+
+ memcpy(new_md, md, efi.memmap.desc_size);
+ new_md += efi.memmap.desc_size;
+ }
+
+ memunmap(new);
+
+ if (efi_memmap_install(new_phys, num_entries)) {
+ pr_err("Could not install new EFI memmap\n");
+ return;
+ }
}
/*
@@ -365,7 +466,7 @@
*/
if (!efi_runtime_supported()) {
pr_info("Setup done, disabling due to 32/64-bit mismatch\n");
- efi_unmap_memmap();
+ efi_memmap_unmap();
}
/* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index fc135bf..429d08b 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -1,5 +1,9 @@
# Family-Level Interface Shim (FLIS)
obj-$(subst m,y,$(CONFIG_PINCTRL_MERRIFIELD)) += platform_mrfld_pinctrl.o
+# SDHCI Devices
+obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI)) += platform_mrfld_sd.o
+# WiFi
+obj-$(subst m,y,$(CONFIG_BRCMFMAC_SDIO)) += platform_bcm43xx.o
# IPC Devices
obj-y += platform_ipc.o
obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
new file mode 100644
index 0000000..4392c15
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
@@ -0,0 +1,95 @@
+/*
+ * platform_bcm43xx.c: bcm43xx platform data initilization file
+ *
+ * (C) Copyright 2016 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/gpio.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/fixed.h>
+#include <linux/sfi.h>
+
+#include <asm/intel-mid.h>
+
+#define WLAN_SFI_GPIO_IRQ_NAME "WLAN-interrupt"
+#define WLAN_SFI_GPIO_ENABLE_NAME "WLAN-enable"
+
+#define WLAN_DEV_NAME "0000:00:01.3"
+
+static struct regulator_consumer_supply bcm43xx_vmmc_supply = {
+ .dev_name = WLAN_DEV_NAME,
+ .supply = "vmmc",
+};
+
+static struct regulator_init_data bcm43xx_vmmc_data = {
+ .constraints = {
+ .valid_ops_mask = REGULATOR_CHANGE_STATUS,
+ },
+ .num_consumer_supplies = 1,
+ .consumer_supplies = &bcm43xx_vmmc_supply,
+};
+
+static struct fixed_voltage_config bcm43xx_vmmc = {
+ .supply_name = "bcm43xx-vmmc-regulator",
+ /*
+ * Announce 2.0V here to be compatible with SDIO specification. The
+ * real voltage and signaling are still 1.8V.
+ */
+ .microvolts = 2000000, /* 1.8V */
+ .gpio = -EINVAL,
+ .startup_delay = 250 * 1000, /* 250ms */
+ .enable_high = 1, /* active high */
+ .enabled_at_boot = 0, /* disabled at boot */
+ .init_data = &bcm43xx_vmmc_data,
+};
+
+static struct platform_device bcm43xx_vmmc_regulator = {
+ .name = "reg-fixed-voltage",
+ .id = PLATFORM_DEVID_AUTO,
+ .dev = {
+ .platform_data = &bcm43xx_vmmc,
+ },
+};
+
+static int __init bcm43xx_regulator_register(void)
+{
+ int ret;
+
+ bcm43xx_vmmc.gpio = get_gpio_by_name(WLAN_SFI_GPIO_ENABLE_NAME);
+ ret = platform_device_register(&bcm43xx_vmmc_regulator);
+ if (ret) {
+ pr_err("%s: vmmc regulator register failed\n", __func__);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void __init *bcm43xx_platform_data(void *info)
+{
+ int ret;
+
+ ret = bcm43xx_regulator_register();
+ if (ret)
+ return NULL;
+
+ pr_info("Using generic wifi platform data\n");
+
+ /* For now it's empty */
+ return NULL;
+}
+
+static const struct devs_id bcm43xx_clk_vmmc_dev_id __initconst = {
+ .name = "bcm43xx_clk_vmmc",
+ .type = SFI_DEV_TYPE_SD,
+ .get_platform_data = &bcm43xx_platform_data,
+};
+
+sfi_device(bcm43xx_clk_vmmc_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c
new file mode 100644
index 0000000..00c4a03
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c
@@ -0,0 +1,47 @@
+/*
+ * SDHCI platform data initilisation file
+ *
+ * (C) Copyright 2016 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/pci.h>
+
+#include <linux/mmc/sdhci-pci-data.h>
+
+#include <asm/intel-mid.h>
+
+#define INTEL_MRFLD_SD 2
+#define INTEL_MRFLD_SD_CD_GPIO 77
+
+static struct sdhci_pci_data mrfld_sdhci_pci_data = {
+ .rst_n_gpio = -EINVAL,
+ .cd_gpio = INTEL_MRFLD_SD_CD_GPIO,
+};
+
+static struct sdhci_pci_data *
+mrfld_sdhci_pci_get_data(struct pci_dev *pdev, int slotno)
+{
+ unsigned int func = PCI_FUNC(pdev->devfn);
+
+ if (func == INTEL_MRFLD_SD)
+ return &mrfld_sdhci_pci_data;
+
+ return NULL;
+}
+
+static int __init mrfld_sd_init(void)
+{
+ if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+ return -ENODEV;
+
+ sdhci_pci_get_data = mrfld_sdhci_pci_get_data;
+ return 0;
+}
+arch_initcall(mrfld_sd_init);
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index ce119d2..7850128 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -70,6 +70,11 @@
static void intel_mid_power_off(void)
{
+ /* Shut down South Complex via PWRMU */
+ intel_mid_pwr_power_off();
+
+ /* Only for Tangier, the rest will ignore this command */
+ intel_scu_ipc_simple_command(IPCMSG_COLD_OFF, 1);
};
static void intel_mid_reboot(void)
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
index c901a34..5d3b45a 100644
--- a/arch/x86/platform/intel-mid/pwr.c
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -44,7 +44,19 @@
/* Bits in PM_CMD */
#define PM_CMD_CMD(x) ((x) << 0)
#define PM_CMD_IOC (1 << 8)
-#define PM_CMD_D3cold (1 << 21)
+#define PM_CMD_CM_NOP (0 << 9)
+#define PM_CMD_CM_IMMEDIATE (1 << 9)
+#define PM_CMD_CM_DELAY (2 << 9)
+#define PM_CMD_CM_TRIGGER (3 << 9)
+
+/* System states */
+#define PM_CMD_SYS_STATE_S5 (5 << 16)
+
+/* Trigger variants */
+#define PM_CMD_CFG_TRIGGER_NC (3 << 19)
+
+/* Message to wait for TRIGGER_NC case */
+#define TRIGGER_NC_MSG_2 (2 << 22)
/* List of commands */
#define CMD_SET_CFG 0x01
@@ -137,7 +149,7 @@
static int mid_pwr_wait_for_cmd(struct mid_pwr *pwr, u8 cmd)
{
- writel(PM_CMD_CMD(cmd), pwr->regs + PM_CMD);
+ writel(PM_CMD_CMD(cmd) | PM_CMD_CM_IMMEDIATE, pwr->regs + PM_CMD);
return mid_pwr_wait(pwr);
}
@@ -260,6 +272,20 @@
}
EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state);
+void intel_mid_pwr_power_off(void)
+{
+ struct mid_pwr *pwr = midpwr;
+ u32 cmd = PM_CMD_SYS_STATE_S5 |
+ PM_CMD_CMD(CMD_SET_CFG) |
+ PM_CMD_CM_TRIGGER |
+ PM_CMD_CFG_TRIGGER_NC |
+ TRIGGER_NC_MSG_2;
+
+ /* Send command to SCU */
+ writel(cmd, pwr->regs + PM_CMD);
+ mid_pwr_wait(pwr);
+}
+
int intel_mid_pwr_get_lss_id(struct pci_dev *pdev)
{
int vndr;
@@ -354,7 +380,7 @@
return 0;
}
-static int mid_set_initial_state(struct mid_pwr *pwr)
+static int mid_set_initial_state(struct mid_pwr *pwr, const u32 *states)
{
unsigned int i, j;
int ret;
@@ -379,10 +405,10 @@
* NOTE: The actual device mapping is provided by a platform at run
* time using vendor capability of PCI configuration space.
*/
- mid_pwr_set_state(pwr, 0, 0xffffffff);
- mid_pwr_set_state(pwr, 1, 0xffffffff);
- mid_pwr_set_state(pwr, 2, 0xffffffff);
- mid_pwr_set_state(pwr, 3, 0xffffffff);
+ mid_pwr_set_state(pwr, 0, states[0]);
+ mid_pwr_set_state(pwr, 1, states[1]);
+ mid_pwr_set_state(pwr, 2, states[2]);
+ mid_pwr_set_state(pwr, 3, states[3]);
/* Send command to SCU */
ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG);
@@ -397,13 +423,41 @@
return 0;
}
-static const struct mid_pwr_device_info mid_info = {
- .set_initial_state = mid_set_initial_state,
+static int pnw_set_initial_state(struct mid_pwr *pwr)
+{
+ /* On Penwell SRAM must stay powered on */
+ const u32 states[] = {
+ 0xf00fffff, /* PM_SSC(0) */
+ 0xffffffff, /* PM_SSC(1) */
+ 0xffffffff, /* PM_SSC(2) */
+ 0xffffffff, /* PM_SSC(3) */
+ };
+ return mid_set_initial_state(pwr, states);
+}
+
+static int tng_set_initial_state(struct mid_pwr *pwr)
+{
+ const u32 states[] = {
+ 0xffffffff, /* PM_SSC(0) */
+ 0xffffffff, /* PM_SSC(1) */
+ 0xffffffff, /* PM_SSC(2) */
+ 0xffffffff, /* PM_SSC(3) */
+ };
+ return mid_set_initial_state(pwr, states);
+}
+
+static const struct mid_pwr_device_info pnw_info = {
+ .set_initial_state = pnw_set_initial_state,
};
+static const struct mid_pwr_device_info tng_info = {
+ .set_initial_state = tng_set_initial_state,
+};
+
+/* This table should be in sync with the one in drivers/pci/pci-mid.c */
static const struct pci_device_id mid_pwr_pci_ids[] = {
- { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PENWELL), (kernel_ulong_t)&mid_info },
- { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&mid_info },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PENWELL), (kernel_ulong_t)&pnw_info },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&tng_info },
{}
};
diff --git a/arch/x86/platform/mellanox/Makefile b/arch/x86/platform/mellanox/Makefile
new file mode 100644
index 0000000..f43c931
--- /dev/null
+++ b/arch/x86/platform/mellanox/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MLX_PLATFORM) += mlx-platform.o
diff --git a/arch/x86/platform/mellanox/mlx-platform.c b/arch/x86/platform/mellanox/mlx-platform.c
new file mode 100644
index 0000000..7dcfcca
--- /dev/null
+++ b/arch/x86/platform/mellanox/mlx-platform.c
@@ -0,0 +1,266 @@
+/*
+ * arch/x86/platform/mellanox/mlx-platform.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Vadim Pasternak <vadimp@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/device.h>
+#include <linux/dmi.h>
+#include <linux/i2c.h>
+#include <linux/i2c-mux.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/i2c-mux-reg.h>
+
+#define MLX_PLAT_DEVICE_NAME "mlxplat"
+
+/* LPC bus IO offsets */
+#define MLXPLAT_CPLD_LPC_I2C_BASE_ADRR 0x2000
+#define MLXPLAT_CPLD_LPC_REG_BASE_ADRR 0x2500
+#define MLXPLAT_CPLD_LPC_IO_RANGE 0x100
+#define MLXPLAT_CPLD_LPC_I2C_CH1_OFF 0xdb
+#define MLXPLAT_CPLD_LPC_I2C_CH2_OFF 0xda
+#define MLXPLAT_CPLD_LPC_PIO_OFFSET 0x10000UL
+#define MLXPLAT_CPLD_LPC_REG1 ((MLXPLAT_CPLD_LPC_REG_BASE_ADRR + \
+ MLXPLAT_CPLD_LPC_I2C_CH1_OFF) | \
+ MLXPLAT_CPLD_LPC_PIO_OFFSET)
+#define MLXPLAT_CPLD_LPC_REG2 ((MLXPLAT_CPLD_LPC_REG_BASE_ADRR + \
+ MLXPLAT_CPLD_LPC_I2C_CH2_OFF) | \
+ MLXPLAT_CPLD_LPC_PIO_OFFSET)
+
+/* Start channel numbers */
+#define MLXPLAT_CPLD_CH1 2
+#define MLXPLAT_CPLD_CH2 10
+
+/* Number of LPC attached MUX platform devices */
+#define MLXPLAT_CPLD_LPC_MUX_DEVS 2
+
+/* mlxplat_priv - platform private data
+ * @pdev_i2c - i2c controller platform device
+ * @pdev_mux - array of mux platform devices
+ */
+struct mlxplat_priv {
+ struct platform_device *pdev_i2c;
+ struct platform_device *pdev_mux[MLXPLAT_CPLD_LPC_MUX_DEVS];
+};
+
+/* Regions for LPC I2C controller and LPC base register space */
+static const struct resource mlxplat_lpc_resources[] = {
+ [0] = DEFINE_RES_NAMED(MLXPLAT_CPLD_LPC_I2C_BASE_ADRR,
+ MLXPLAT_CPLD_LPC_IO_RANGE,
+ "mlxplat_cpld_lpc_i2c_ctrl", IORESOURCE_IO),
+ [1] = DEFINE_RES_NAMED(MLXPLAT_CPLD_LPC_REG_BASE_ADRR,
+ MLXPLAT_CPLD_LPC_IO_RANGE,
+ "mlxplat_cpld_lpc_regs",
+ IORESOURCE_IO),
+};
+
+/* Platform default channels */
+static const int mlxplat_default_channels[][8] = {
+ {
+ MLXPLAT_CPLD_CH1, MLXPLAT_CPLD_CH1 + 1, MLXPLAT_CPLD_CH1 + 2,
+ MLXPLAT_CPLD_CH1 + 3, MLXPLAT_CPLD_CH1 + 4, MLXPLAT_CPLD_CH1 +
+ 5, MLXPLAT_CPLD_CH1 + 6, MLXPLAT_CPLD_CH1 + 7
+ },
+ {
+ MLXPLAT_CPLD_CH2, MLXPLAT_CPLD_CH2 + 1, MLXPLAT_CPLD_CH2 + 2,
+ MLXPLAT_CPLD_CH2 + 3, MLXPLAT_CPLD_CH2 + 4, MLXPLAT_CPLD_CH2 +
+ 5, MLXPLAT_CPLD_CH2 + 6, MLXPLAT_CPLD_CH2 + 7
+ },
+};
+
+/* Platform channels for MSN21xx system family */
+static const int mlxplat_msn21xx_channels[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+
+/* Platform mux data */
+static struct i2c_mux_reg_platform_data mlxplat_mux_data[] = {
+ {
+ .parent = 1,
+ .base_nr = MLXPLAT_CPLD_CH1,
+ .write_only = 1,
+ .reg = (void __iomem *)MLXPLAT_CPLD_LPC_REG1,
+ .reg_size = 1,
+ .idle_in_use = 1,
+ },
+ {
+ .parent = 1,
+ .base_nr = MLXPLAT_CPLD_CH2,
+ .write_only = 1,
+ .reg = (void __iomem *)MLXPLAT_CPLD_LPC_REG2,
+ .reg_size = 1,
+ .idle_in_use = 1,
+ },
+
+};
+
+static struct platform_device *mlxplat_dev;
+
+static int __init mlxplat_dmi_default_matched(const struct dmi_system_id *dmi)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
+ mlxplat_mux_data[i].values = mlxplat_default_channels[i];
+ mlxplat_mux_data[i].n_values =
+ ARRAY_SIZE(mlxplat_default_channels[i]);
+ }
+
+ return 1;
+};
+
+static int __init mlxplat_dmi_msn21xx_matched(const struct dmi_system_id *dmi)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
+ mlxplat_mux_data[i].values = mlxplat_msn21xx_channels;
+ mlxplat_mux_data[i].n_values =
+ ARRAY_SIZE(mlxplat_msn21xx_channels);
+ }
+
+ return 1;
+};
+
+static struct dmi_system_id mlxplat_dmi_table[] __initdata = {
+ {
+ .callback = mlxplat_dmi_default_matched,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MSN24"),
+ },
+ },
+ {
+ .callback = mlxplat_dmi_default_matched,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MSN27"),
+ },
+ },
+ {
+ .callback = mlxplat_dmi_default_matched,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MSB"),
+ },
+ },
+ {
+ .callback = mlxplat_dmi_default_matched,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MSX"),
+ },
+ },
+ {
+ .callback = mlxplat_dmi_msn21xx_matched,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MSN21"),
+ },
+ },
+ { }
+};
+
+static int __init mlxplat_init(void)
+{
+ struct mlxplat_priv *priv;
+ int i, err;
+
+ if (!dmi_check_system(mlxplat_dmi_table))
+ return -ENODEV;
+
+ mlxplat_dev = platform_device_register_simple(MLX_PLAT_DEVICE_NAME, -1,
+ mlxplat_lpc_resources,
+ ARRAY_SIZE(mlxplat_lpc_resources));
+
+ if (IS_ERR(mlxplat_dev))
+ return PTR_ERR(mlxplat_dev);
+
+ priv = devm_kzalloc(&mlxplat_dev->dev, sizeof(struct mlxplat_priv),
+ GFP_KERNEL);
+ if (!priv) {
+ err = -ENOMEM;
+ goto fail_alloc;
+ }
+ platform_set_drvdata(mlxplat_dev, priv);
+
+ priv->pdev_i2c = platform_device_register_simple("i2c_mlxcpld", -1,
+ NULL, 0);
+ if (IS_ERR(priv->pdev_i2c)) {
+ err = PTR_ERR(priv->pdev_i2c);
+ goto fail_alloc;
+ };
+
+ for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
+ priv->pdev_mux[i] = platform_device_register_resndata(
+ &mlxplat_dev->dev,
+ "i2c-mux-reg", i, NULL,
+ 0, &mlxplat_mux_data[i],
+ sizeof(mlxplat_mux_data[i]));
+ if (IS_ERR(priv->pdev_mux[i])) {
+ err = PTR_ERR(priv->pdev_mux[i]);
+ goto fail_platform_mux_register;
+ }
+ }
+
+ return 0;
+
+fail_platform_mux_register:
+ for (i--; i > 0 ; i--)
+ platform_device_unregister(priv->pdev_mux[i]);
+ platform_device_unregister(priv->pdev_i2c);
+fail_alloc:
+ platform_device_unregister(mlxplat_dev);
+
+ return err;
+}
+module_init(mlxplat_init);
+
+static void __exit mlxplat_exit(void)
+{
+ struct mlxplat_priv *priv = platform_get_drvdata(mlxplat_dev);
+ int i;
+
+ for (i = ARRAY_SIZE(mlxplat_mux_data) - 1; i >= 0 ; i--)
+ platform_device_unregister(priv->pdev_mux[i]);
+
+ platform_device_unregister(priv->pdev_i2c);
+ platform_device_unregister(mlxplat_dev);
+}
+module_exit(mlxplat_exit);
+
+MODULE_AUTHOR("Vadim Pasternak (vadimp@mellanox.com)");
+MODULE_DESCRIPTION("Mellanox platform driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("dmi:*:*Mellanox*:MSN24*:");
+MODULE_ALIAS("dmi:*:*Mellanox*:MSN27*:");
+MODULE_ALIAS("dmi:*:*Mellanox*:MSB*:");
+MODULE_ALIAS("dmi:*:*Mellanox*:MSX*:");
+MODULE_ALIAS("dmi:*:*Mellanox*:MSN21*:");
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 23f2f3e..b4d5e95 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -149,11 +149,8 @@
s64
uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
{
- s64 ret;
-
- ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
- (u64)addr, buf, (u64)len, 0);
- return ret;
+ return uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
+ (u64)addr, buf, (u64)len, 0);
}
EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa);
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index fdb4d42..9e42842 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -24,6 +24,29 @@
#include <asm/irq_vectors.h>
#include <asm/timer.h>
+static struct bau_operations ops;
+
+static struct bau_operations uv123_bau_ops = {
+ .bau_gpa_to_offset = uv_gpa_to_offset,
+ .read_l_sw_ack = read_mmr_sw_ack,
+ .read_g_sw_ack = read_gmmr_sw_ack,
+ .write_l_sw_ack = write_mmr_sw_ack,
+ .write_g_sw_ack = write_gmmr_sw_ack,
+ .write_payload_first = write_mmr_payload_first,
+ .write_payload_last = write_mmr_payload_last,
+};
+
+static struct bau_operations uv4_bau_ops = {
+ .bau_gpa_to_offset = uv_gpa_to_soc_phys_ram,
+ .read_l_sw_ack = read_mmr_proc_sw_ack,
+ .read_g_sw_ack = read_gmmr_proc_sw_ack,
+ .write_l_sw_ack = write_mmr_proc_sw_ack,
+ .write_g_sw_ack = write_gmmr_proc_sw_ack,
+ .write_payload_first = write_mmr_proc_payload_first,
+ .write_payload_last = write_mmr_proc_payload_last,
+};
+
+
/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */
static int timeout_base_ns[] = {
20,
@@ -55,16 +78,16 @@
static int disabled_period = DISABLED_PERIOD;
static struct tunables tunables[] = {
- {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
- {&plugged_delay, PLUGGED_DELAY},
- {&plugsb4reset, PLUGSB4RESET},
- {&timeoutsb4reset, TIMEOUTSB4RESET},
- {&ipi_reset_limit, IPI_RESET_LIMIT},
- {&complete_threshold, COMPLETE_THRESHOLD},
- {&congested_respns_us, CONGESTED_RESPONSE_US},
- {&congested_reps, CONGESTED_REPS},
- {&disabled_period, DISABLED_PERIOD},
- {&giveup_limit, GIVEUP_LIMIT}
+ {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
+ {&plugged_delay, PLUGGED_DELAY},
+ {&plugsb4reset, PLUGSB4RESET},
+ {&timeoutsb4reset, TIMEOUTSB4RESET},
+ {&ipi_reset_limit, IPI_RESET_LIMIT},
+ {&complete_threshold, COMPLETE_THRESHOLD},
+ {&congested_respns_us, CONGESTED_RESPONSE_US},
+ {&congested_reps, CONGESTED_REPS},
+ {&disabled_period, DISABLED_PERIOD},
+ {&giveup_limit, GIVEUP_LIMIT}
};
static struct dentry *tunables_dir;
@@ -216,7 +239,7 @@
msg = mdp->msg;
if (!msg->canceled && do_acknowledge) {
dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
- write_mmr_sw_ack(dw);
+ ops.write_l_sw_ack(dw);
}
msg->replied_to = 1;
msg->swack_vec = 0;
@@ -252,7 +275,7 @@
msg->swack_vec) == 0) &&
(msg2->sending_cpu == msg->sending_cpu) &&
(msg2->msg_type != MSG_NOOP)) {
- mmr = read_mmr_sw_ack();
+ mmr = ops.read_l_sw_ack();
msg_res = msg2->swack_vec;
/*
* This is a message retry; clear the resources held
@@ -270,7 +293,7 @@
stat->d_canceled++;
cancel_count++;
mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
- write_mmr_sw_ack(mr);
+ ops.write_l_sw_ack(mr);
}
}
}
@@ -403,12 +426,12 @@
/*
* only reset the resource if it is still pending
*/
- mmr = read_mmr_sw_ack();
+ mmr = ops.read_l_sw_ack();
msg_res = msg->swack_vec;
mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
if (mmr & msg_res) {
stat->d_rcanceled++;
- write_mmr_sw_ack(mr);
+ ops.write_l_sw_ack(mr);
}
}
}
@@ -580,11 +603,7 @@
*/
static unsigned long uv2_3_read_status(unsigned long offset, int rshft, int desc)
{
- unsigned long descriptor_status;
-
- descriptor_status =
- ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
- return descriptor_status;
+ return ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
}
/*
@@ -1202,7 +1221,7 @@
struct bau_pq_entry *msg = mdp->msg;
struct bau_pq_entry *other_msg;
- mmr_image = read_mmr_sw_ack();
+ mmr_image = ops.read_l_sw_ack();
swack_vec = msg->swack_vec;
if ((swack_vec & mmr_image) == 0) {
@@ -1431,7 +1450,7 @@
/* destination side statistics */
seq_printf(file,
"%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",
- read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)),
+ ops.read_g_sw_ack(uv_cpu_to_pnode(cpu)),
stat->d_requestee, cycles_2_us(stat->d_time),
stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
stat->d_nomsg, stat->d_retries, stat->d_canceled,
@@ -1497,16 +1516,16 @@
}
if (kstrtol(optstr, 10, &input_arg) < 0) {
- printk(KERN_DEBUG "%s is invalid\n", optstr);
+ pr_debug("%s is invalid\n", optstr);
return -EINVAL;
}
if (input_arg == 0) {
elements = ARRAY_SIZE(stat_description);
- printk(KERN_DEBUG "# cpu: cpu number\n");
- printk(KERN_DEBUG "Sender statistics:\n");
+ pr_debug("# cpu: cpu number\n");
+ pr_debug("Sender statistics:\n");
for (i = 0; i < elements; i++)
- printk(KERN_DEBUG "%s\n", stat_description[i]);
+ pr_debug("%s\n", stat_description[i]);
} else if (input_arg == -1) {
for_each_present_cpu(cpu) {
stat = &per_cpu(ptcstats, cpu);
@@ -1554,7 +1573,7 @@
break;
}
if (cnt != e) {
- printk(KERN_INFO "bau tunable error: should be %d values\n", e);
+ pr_info("bau tunable error: should be %d values\n", e);
return -EINVAL;
}
@@ -1571,7 +1590,7 @@
continue;
}
if (val < 1 || val > bcp->cpus_in_uvhub) {
- printk(KERN_DEBUG
+ pr_debug(
"Error: BAU max concurrent %d is invalid\n",
val);
return -EINVAL;
@@ -1619,17 +1638,17 @@
for_each_present_cpu(cpu) {
bcp = &per_cpu(bau_control, cpu);
- bcp->max_concurr = max_concurr;
- bcp->max_concurr_const = max_concurr;
- bcp->plugged_delay = plugged_delay;
- bcp->plugsb4reset = plugsb4reset;
- bcp->timeoutsb4reset = timeoutsb4reset;
- bcp->ipi_reset_limit = ipi_reset_limit;
- bcp->complete_threshold = complete_threshold;
- bcp->cong_response_us = congested_respns_us;
- bcp->cong_reps = congested_reps;
- bcp->disabled_period = sec_2_cycles(disabled_period);
- bcp->giveup_limit = giveup_limit;
+ bcp->max_concurr = max_concurr;
+ bcp->max_concurr_const = max_concurr;
+ bcp->plugged_delay = plugged_delay;
+ bcp->plugsb4reset = plugsb4reset;
+ bcp->timeoutsb4reset = timeoutsb4reset;
+ bcp->ipi_reset_limit = ipi_reset_limit;
+ bcp->complete_threshold = complete_threshold;
+ bcp->cong_response_us = congested_respns_us;
+ bcp->cong_reps = congested_reps;
+ bcp->disabled_period = sec_2_cycles(disabled_period);
+ bcp->giveup_limit = giveup_limit;
}
return count;
}
@@ -1676,21 +1695,21 @@
proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
&proc_uv_ptc_operations);
if (!proc_uv_ptc) {
- printk(KERN_ERR "unable to create %s proc entry\n",
+ pr_err("unable to create %s proc entry\n",
UV_PTC_BASENAME);
return -EINVAL;
}
tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL);
if (!tunables_dir) {
- printk(KERN_ERR "unable to create debugfs directory %s\n",
+ pr_err("unable to create debugfs directory %s\n",
UV_BAU_TUNABLES_DIR);
return -EINVAL;
}
tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600,
tunables_dir, NULL, &tunables_fops);
if (!tunables_file) {
- printk(KERN_ERR "unable to create debugfs file %s\n",
+ pr_err("unable to create debugfs file %s\n",
UV_BAU_TUNABLES_FILE);
return -EINVAL;
}
@@ -1725,7 +1744,7 @@
gpa = uv_gpa(bau_desc);
n = uv_gpa_to_gnode(gpa);
- m = uv_gpa_to_offset(gpa);
+ m = ops.bau_gpa_to_offset(gpa);
if (is_uv1_hub())
uv1 = 1;
@@ -1740,7 +1759,7 @@
memset(bd2, 0, sizeof(struct bau_desc));
if (uv1) {
uv1_hdr = &bd2->header.uv1_hdr;
- uv1_hdr->swack_flag = 1;
+ uv1_hdr->swack_flag = 1;
/*
* The base_dest_nasid set in the message header
* is the nasid of the first uvhub in the partition.
@@ -1749,10 +1768,10 @@
* if nasid striding is being used.
*/
uv1_hdr->base_dest_nasid =
- UV_PNODE_TO_NASID(base_pnode);
- uv1_hdr->dest_subnodeid = UV_LB_SUBNODEID;
- uv1_hdr->command = UV_NET_ENDPOINT_INTD;
- uv1_hdr->int_both = 1;
+ UV_PNODE_TO_NASID(base_pnode);
+ uv1_hdr->dest_subnodeid = UV_LB_SUBNODEID;
+ uv1_hdr->command = UV_NET_ENDPOINT_INTD;
+ uv1_hdr->int_both = 1;
/*
* all others need to be set to zero:
* fairness chaining multilevel count replied_to
@@ -1763,11 +1782,11 @@
* uses native mode for selective broadcasts.
*/
uv2_3_hdr = &bd2->header.uv2_3_hdr;
- uv2_3_hdr->swack_flag = 1;
+ uv2_3_hdr->swack_flag = 1;
uv2_3_hdr->base_dest_nasid =
- UV_PNODE_TO_NASID(base_pnode);
- uv2_3_hdr->dest_subnodeid = UV_LB_SUBNODEID;
- uv2_3_hdr->command = UV_NET_ENDPOINT_INTD;
+ UV_PNODE_TO_NASID(base_pnode);
+ uv2_3_hdr->dest_subnodeid = UV_LB_SUBNODEID;
+ uv2_3_hdr->command = UV_NET_ENDPOINT_INTD;
}
}
for_each_present_cpu(cpu) {
@@ -1790,10 +1809,7 @@
size_t plsize;
char *cp;
void *vp;
- unsigned long pn;
- unsigned long first;
- unsigned long pn_first;
- unsigned long last;
+ unsigned long gnode, first, last, tail;
struct bau_pq_entry *pqp;
struct bau_control *bcp;
@@ -1814,17 +1830,25 @@
bcp->bau_msg_head = pqp;
bcp->queue_last = pqp + (DEST_Q_SIZE - 1);
}
+
+ first = ops.bau_gpa_to_offset(uv_gpa(pqp));
+ last = ops.bau_gpa_to_offset(uv_gpa(pqp + (DEST_Q_SIZE - 1)));
+
/*
- * need the gnode of where the memory was really allocated
+ * Pre UV4, the gnode is required to locate the payload queue
+ * and the payload queue tail must be maintained by the kernel.
*/
- pn = uv_gpa_to_gnode(uv_gpa(pqp));
- first = uv_physnodeaddr(pqp);
- pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first;
- last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1));
- write_mmr_payload_first(pnode, pn_first);
- write_mmr_payload_tail(pnode, first);
- write_mmr_payload_last(pnode, last);
- write_gmmr_sw_ack(pnode, 0xffffUL);
+ bcp = &per_cpu(bau_control, smp_processor_id());
+ if (bcp->uvhub_version <= 3) {
+ tail = first;
+ gnode = uv_gpa_to_gnode(uv_gpa(pqp));
+ first = (gnode << UV_PAYLOADQ_GNODE_SHIFT) | tail;
+ write_mmr_payload_tail(pnode, tail);
+ }
+
+ ops.write_payload_first(pnode, first);
+ ops.write_payload_last(pnode, last);
+ ops.write_g_sw_ack(pnode, 0xffffUL);
/* in effect, all msg_type's are set to MSG_NOOP */
memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
@@ -1914,8 +1938,8 @@
bcp->complete_threshold = complete_threshold;
bcp->cong_response_us = congested_respns_us;
bcp->cong_reps = congested_reps;
- bcp->disabled_period = sec_2_cycles(disabled_period);
- bcp->giveup_limit = giveup_limit;
+ bcp->disabled_period = sec_2_cycles(disabled_period);
+ bcp->giveup_limit = giveup_limit;
spin_lock_init(&bcp->queue_lock);
spin_lock_init(&bcp->uvhub_lock);
spin_lock_init(&bcp->disable_lock);
@@ -1944,7 +1968,7 @@
pnode = uv_cpu_hub_info(cpu)->pnode;
if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) {
- printk(KERN_EMERG
+ pr_emerg(
"cpu %d pnode %d-%d beyond %d; BAU disabled\n",
cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE);
return 1;
@@ -1969,7 +1993,7 @@
sdp->cpu_number[sdp->num_cpus] = cpu;
sdp->num_cpus++;
if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
- printk(KERN_EMERG "%d cpus per socket invalid\n",
+ pr_emerg("%d cpus per socket invalid\n",
sdp->num_cpus);
return 1;
}
@@ -2035,15 +2059,17 @@
bcp->uvhub_version = 2;
else if (is_uv3_hub())
bcp->uvhub_version = 3;
+ else if (is_uv4_hub())
+ bcp->uvhub_version = 4;
else {
- printk(KERN_EMERG "uvhub version not 1, 2 or 3\n");
+ pr_emerg("uvhub version not 1, 2, 3, or 4\n");
return 1;
}
bcp->uvhub_master = *hmasterp;
bcp->uvhub_cpu = uv_cpu_blade_processor_id(cpu);
if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
- printk(KERN_EMERG "%d cpus per uvhub invalid\n",
+ pr_emerg("%d cpus per uvhub invalid\n",
bcp->uvhub_cpu);
return 1;
}
@@ -2098,7 +2124,8 @@
void *vp;
struct uvhub_desc *uvhub_descs;
- timeout_us = calculate_destination_timeout();
+ if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub())
+ timeout_us = calculate_destination_timeout();
vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
uvhub_descs = (struct uvhub_desc *)vp;
@@ -2138,6 +2165,15 @@
if (!is_uv_system())
return 0;
+ if (is_uv4_hub())
+ ops = uv4_bau_ops;
+ else if (is_uv3_hub())
+ ops = uv123_bau_ops;
+ else if (is_uv2_hub())
+ ops = uv123_bau_ops;
+ else if (is_uv1_hub())
+ ops = uv123_bau_ops;
+
for_each_possible_cpu(cur_cpu) {
mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
@@ -2153,7 +2189,9 @@
uv_base_pnode = uv_blade_to_pnode(uvhub);
}
- enable_timeouts();
+ /* software timeouts are not supported on UV4 */
+ if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub())
+ enable_timeouts();
if (init_per_cpu(nuvhubs, uv_base_pnode)) {
set_bau_off();
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index b12c26e..53cace2 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -130,7 +130,7 @@
ctxt->cr0 = read_cr0();
ctxt->cr2 = read_cr2();
ctxt->cr3 = read_cr3();
- ctxt->cr4 = __read_cr4_safe();
+ ctxt->cr4 = __read_cr4();
#ifdef CONFIG_X86_64
ctxt->cr8 = read_cr8();
#endif
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index 1104515..1ac7647 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -68,6 +68,7 @@
MCE_INJECT_SET(status);
MCE_INJECT_SET(misc);
MCE_INJECT_SET(addr);
+MCE_INJECT_SET(synd);
#define MCE_INJECT_GET(reg) \
static int inj_##reg##_get(void *data, u64 *val) \
@@ -81,10 +82,12 @@
MCE_INJECT_GET(status);
MCE_INJECT_GET(misc);
MCE_INJECT_GET(addr);
+MCE_INJECT_GET(synd);
DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
/*
* Caller needs to be make sure this cpu doesn't disappear
@@ -243,27 +246,27 @@
static void prepare_msrs(void *info)
{
- struct mce i_mce = *(struct mce *)info;
- u8 b = i_mce.bank;
+ struct mce m = *(struct mce *)info;
+ u8 b = m.bank;
- wrmsrl(MSR_IA32_MCG_STATUS, i_mce.mcgstatus);
+ wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
if (boot_cpu_has(X86_FEATURE_SMCA)) {
- if (i_mce.inject_flags == DFR_INT_INJ) {
- wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), i_mce.status);
- wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), i_mce.addr);
+ if (m.inject_flags == DFR_INT_INJ) {
+ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
+ wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
} else {
- wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), i_mce.status);
- wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), i_mce.addr);
+ wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
+ wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
}
- wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc);
+ wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
+ wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
} else {
- wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status);
- wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr);
- wrmsrl(MSR_IA32_MCx_MISC(b), i_mce.misc);
+ wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
+ wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
+ wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
}
-
}
static void do_inject(void)
@@ -275,6 +278,9 @@
if (i_mce.misc)
i_mce.status |= MCI_STATUS_MISCV;
+ if (i_mce.synd)
+ i_mce.status |= MCI_STATUS_SYNDV;
+
if (inj_type == SW_INJ) {
mce_inject_log(&i_mce);
return;
@@ -301,7 +307,9 @@
* only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
* Fam10h and later BKDGs.
*/
- if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) {
+ if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
+ b == 4 &&
+ boot_cpu_data.x86 < 0x17) {
toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
cpu = get_nbc_for_node(amd_get_nb_id(cpu));
}
@@ -371,6 +379,9 @@
"\t used for error thresholding purposes and its validity is indicated by\n"
"\t MCi_STATUS[MiscV].\n"
"\n"
+"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
+"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
+"\n"
"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
"\t associated with the error.\n"
"\n"
@@ -420,6 +431,7 @@
{ .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
+ { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
@@ -428,7 +440,7 @@
static int __init init_mce_inject(void)
{
- int i;
+ unsigned int i;
u64 cap;
rdmsrl(MSR_IA32_MCG_CAP, cap);
@@ -452,26 +464,22 @@
return 0;
err_dfs_add:
- while (--i >= 0)
+ while (i-- > 0)
debugfs_remove(dfs_fls[i].d);
debugfs_remove(dfs_inj);
dfs_inj = NULL;
- return -ENOMEM;
+ return -ENODEV;
}
static void __exit exit_mce_inject(void)
{
- int i;
- for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
- debugfs_remove(dfs_fls[i].d);
+ debugfs_remove_recursive(dfs_inj);
+ dfs_inj = NULL;
memset(&dfs_fls, 0, sizeof(dfs_fls));
-
- debugfs_remove(dfs_inj);
- dfs_inj = NULL;
}
module_init(init_mce_inject);
module_exit(exit_mce_inject);
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index a7ef7b1..5766ead 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -194,7 +194,7 @@
static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
{
- int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+ int err, n, cpu = task_cpu(child);
struct user_i387_struct fpregs;
err = save_i387_registers(userspace_pid[cpu],
@@ -211,7 +211,7 @@
static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
{
- int n, cpu = ((struct thread_info *) child->stack)->cpu;
+ int n, cpu = task_cpu(child);
struct user_i387_struct fpregs;
n = copy_from_user(&fpregs, buf, sizeof(fpregs));
@@ -224,7 +224,7 @@
static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
{
- int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+ int err, n, cpu = task_cpu(child);
struct user_fxsr_struct fpregs;
err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
@@ -240,7 +240,7 @@
static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
{
- int n, cpu = ((struct thread_info *) child->stack)->cpu;
+ int n, cpu = task_cpu(child);
struct user_fxsr_struct fpregs;
n = copy_from_user(&fpregs, buf, sizeof(fpregs));
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b86ebb1..f1d2182 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1237,7 +1237,6 @@
.write_cr0 = xen_write_cr0,
.read_cr4 = native_read_cr4,
- .read_cr4_safe = native_read_cr4_safe,
.write_cr4 = xen_write_cr4,
#ifdef CONFIG_X86_64
@@ -1925,6 +1924,45 @@
}
}
+static void xen_pin_vcpu(int cpu)
+{
+ static bool disable_pinning;
+ struct sched_pin_override pin_override;
+ int ret;
+
+ if (disable_pinning)
+ return;
+
+ pin_override.pcpu = cpu;
+ ret = HYPERVISOR_sched_op(SCHEDOP_pin_override, &pin_override);
+
+ /* Ignore errors when removing override. */
+ if (cpu < 0)
+ return;
+
+ switch (ret) {
+ case -ENOSYS:
+ pr_warn("Unable to pin on physical cpu %d. In case of problems consider vcpu pinning.\n",
+ cpu);
+ disable_pinning = true;
+ break;
+ case -EPERM:
+ WARN(1, "Trying to pin vcpu without having privilege to do so\n");
+ disable_pinning = true;
+ break;
+ case -EINVAL:
+ case -EBUSY:
+ pr_warn("Physical cpu %d not available for pinning. Check Xen cpu configuration.\n",
+ cpu);
+ break;
+ case 0:
+ break;
+ default:
+ WARN(1, "rc %d while trying to pin vcpu\n", ret);
+ disable_pinning = true;
+ }
+}
+
const struct hypervisor_x86 x86_hyper_xen = {
.name = "Xen",
.detect = xen_platform,
@@ -1933,6 +1971,7 @@
#endif
.x2apic_available = xen_x2apic_para_available,
.set_cpu_features = xen_set_cpu_features,
+ .pin_vcpu = xen_pin_vcpu,
};
EXPORT_SYMBOL(x86_hyper_xen);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 1764252..f8960fc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -861,7 +861,7 @@
e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
E820_RESERVED);
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
/*
* Check whether the kernel itself conflicts with the target E820 map.
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index f42e78d..3d6e006 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -21,8 +21,6 @@
static DEFINE_PER_CPU(char *, irq_name);
static bool xen_pvspin = true;
-#ifdef CONFIG_QUEUED_SPINLOCKS
-
#include <asm/qspinlock.h>
static void xen_qlock_kick(int cpu)
@@ -71,207 +69,6 @@
xen_poll_irq(irq);
}
-#else /* CONFIG_QUEUED_SPINLOCKS */
-
-enum xen_contention_stat {
- TAKEN_SLOW,
- TAKEN_SLOW_PICKUP,
- TAKEN_SLOW_SPURIOUS,
- RELEASED_SLOW,
- RELEASED_SLOW_KICKED,
- NR_CONTENTION_STATS
-};
-
-
-#ifdef CONFIG_XEN_DEBUG_FS
-#define HISTO_BUCKETS 30
-static struct xen_spinlock_stats
-{
- u32 contention_stats[NR_CONTENTION_STATS];
- u32 histo_spin_blocked[HISTO_BUCKETS+1];
- u64 time_blocked;
-} spinlock_stats;
-
-static u8 zero_stats;
-
-static inline void check_zero(void)
-{
- u8 ret;
- u8 old = READ_ONCE(zero_stats);
- if (unlikely(old)) {
- ret = cmpxchg(&zero_stats, old, 0);
- /* This ensures only one fellow resets the stat */
- if (ret == old)
- memset(&spinlock_stats, 0, sizeof(spinlock_stats));
- }
-}
-
-static inline void add_stats(enum xen_contention_stat var, u32 val)
-{
- check_zero();
- spinlock_stats.contention_stats[var] += val;
-}
-
-static inline u64 spin_time_start(void)
-{
- return xen_clocksource_read();
-}
-
-static void __spin_time_accum(u64 delta, u32 *array)
-{
- unsigned index = ilog2(delta);
-
- check_zero();
-
- if (index < HISTO_BUCKETS)
- array[index]++;
- else
- array[HISTO_BUCKETS]++;
-}
-
-static inline void spin_time_accum_blocked(u64 start)
-{
- u32 delta = xen_clocksource_read() - start;
-
- __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
- spinlock_stats.time_blocked += delta;
-}
-#else /* !CONFIG_XEN_DEBUG_FS */
-static inline void add_stats(enum xen_contention_stat var, u32 val)
-{
-}
-
-static inline u64 spin_time_start(void)
-{
- return 0;
-}
-
-static inline void spin_time_accum_blocked(u64 start)
-{
-}
-#endif /* CONFIG_XEN_DEBUG_FS */
-
-struct xen_lock_waiting {
- struct arch_spinlock *lock;
- __ticket_t want;
-};
-
-static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
-static cpumask_t waiting_cpus;
-
-__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
-{
- int irq = __this_cpu_read(lock_kicker_irq);
- struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting);
- int cpu = smp_processor_id();
- u64 start;
- __ticket_t head;
- unsigned long flags;
-
- /* If kicker interrupts not initialized yet, just spin */
- if (irq == -1)
- return;
-
- start = spin_time_start();
-
- /*
- * Make sure an interrupt handler can't upset things in a
- * partially setup state.
- */
- local_irq_save(flags);
- /*
- * We don't really care if we're overwriting some other
- * (lock,want) pair, as that would mean that we're currently
- * in an interrupt context, and the outer context had
- * interrupts enabled. That has already kicked the VCPU out
- * of xen_poll_irq(), so it will just return spuriously and
- * retry with newly setup (lock,want).
- *
- * The ordering protocol on this is that the "lock" pointer
- * may only be set non-NULL if the "want" ticket is correct.
- * If we're updating "want", we must first clear "lock".
- */
- w->lock = NULL;
- smp_wmb();
- w->want = want;
- smp_wmb();
- w->lock = lock;
-
- /* This uses set_bit, which atomic and therefore a barrier */
- cpumask_set_cpu(cpu, &waiting_cpus);
- add_stats(TAKEN_SLOW, 1);
-
- /* clear pending */
- xen_clear_irq_pending(irq);
-
- /* Only check lock once pending cleared */
- barrier();
-
- /*
- * Mark entry to slowpath before doing the pickup test to make
- * sure we don't deadlock with an unlocker.
- */
- __ticket_enter_slowpath(lock);
-
- /* make sure enter_slowpath, which is atomic does not cross the read */
- smp_mb__after_atomic();
-
- /*
- * check again make sure it didn't become free while
- * we weren't looking
- */
- head = READ_ONCE(lock->tickets.head);
- if (__tickets_equal(head, want)) {
- add_stats(TAKEN_SLOW_PICKUP, 1);
- goto out;
- }
-
- /* Allow interrupts while blocked */
- local_irq_restore(flags);
-
- /*
- * If an interrupt happens here, it will leave the wakeup irq
- * pending, which will cause xen_poll_irq() to return
- * immediately.
- */
-
- /* Block until irq becomes pending (or perhaps a spurious wakeup) */
- xen_poll_irq(irq);
- add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq));
-
- local_irq_save(flags);
-
- kstat_incr_irq_this_cpu(irq);
-out:
- cpumask_clear_cpu(cpu, &waiting_cpus);
- w->lock = NULL;
-
- local_irq_restore(flags);
-
- spin_time_accum_blocked(start);
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning);
-
-static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
-{
- int cpu;
-
- add_stats(RELEASED_SLOW, 1);
-
- for_each_cpu(cpu, &waiting_cpus) {
- const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
-
- /* Make sure we read lock before want */
- if (READ_ONCE(w->lock) == lock &&
- READ_ONCE(w->want) == next) {
- add_stats(RELEASED_SLOW_KICKED, 1);
- xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
- break;
- }
- }
-}
-#endif /* CONFIG_QUEUED_SPINLOCKS */
-
static irqreturn_t dummy_handler(int irq, void *dev_id)
{
BUG();
@@ -334,16 +131,12 @@
return;
}
printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
-#ifdef CONFIG_QUEUED_SPINLOCKS
+
__pv_init_lock_hash();
pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
pv_lock_ops.wait = xen_qlock_wait;
pv_lock_ops.kick = xen_qlock_kick;
-#else
- pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
- pv_lock_ops.unlock_kick = xen_unlock_kick;
-#endif
}
/*
@@ -372,44 +165,3 @@
}
early_param("xen_nopvspin", xen_parse_nopvspin);
-#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS)
-
-static struct dentry *d_spin_debug;
-
-static int __init xen_spinlock_debugfs(void)
-{
- struct dentry *d_xen = xen_init_debugfs();
-
- if (d_xen == NULL)
- return -ENOMEM;
-
- if (!xen_pvspin)
- return 0;
-
- d_spin_debug = debugfs_create_dir("spinlocks", d_xen);
-
- debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
-
- debugfs_create_u32("taken_slow", 0444, d_spin_debug,
- &spinlock_stats.contention_stats[TAKEN_SLOW]);
- debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
- &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
- debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
- &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]);
-
- debugfs_create_u32("released_slow", 0444, d_spin_debug,
- &spinlock_stats.contention_stats[RELEASED_SLOW]);
- debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
- &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
-
- debugfs_create_u64("time_blocked", 0444, d_spin_debug,
- &spinlock_stats.time_blocked);
-
- debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
- spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
-
- return 0;
-}
-fs_initcall(xen_spinlock_debugfs);
-
-#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 13f5a6c..c207fa9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -296,17 +296,29 @@
if (ret)
return ERR_PTR(ret);
+ /*
+ * Check if the hardware context is actually mapped to anything.
+ * If not tell the caller that it should skip this queue.
+ */
hctx = q->queue_hw_ctx[hctx_idx];
+ if (!blk_mq_hw_queue_mapped(hctx)) {
+ ret = -EXDEV;
+ goto out_queue_exit;
+ }
ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
if (!rq) {
- blk_queue_exit(q);
- return ERR_PTR(-EWOULDBLOCK);
+ ret = -EWOULDBLOCK;
+ goto out_queue_exit;
}
return rq;
+
+out_queue_exit:
+ blk_queue_exit(q);
+ return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 53b1737..96631e6 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -78,30 +78,21 @@
}
#endif
-static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
- void *hcpu)
+static int blk_softirq_cpu_dead(unsigned int cpu)
{
/*
* If a CPU goes away, splice its entries to the current CPU
* and trigger a run of the softirq
*/
- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
- int cpu = (unsigned long) hcpu;
+ local_irq_disable();
+ list_splice_init(&per_cpu(blk_cpu_done, cpu),
+ this_cpu_ptr(&blk_cpu_done));
+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
+ local_irq_enable();
- local_irq_disable();
- list_splice_init(&per_cpu(blk_cpu_done, cpu),
- this_cpu_ptr(&blk_cpu_done));
- raise_softirq_irqoff(BLOCK_SOFTIRQ);
- local_irq_enable();
- }
-
- return NOTIFY_OK;
+ return 0;
}
-static struct notifier_block blk_cpu_notifier = {
- .notifier_call = blk_cpu_notify,
-};
-
void __blk_complete_request(struct request *req)
{
int ccpu, cpu;
@@ -180,7 +171,9 @@
INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
- register_hotcpu_notifier(&blk_cpu_notifier);
+ cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
+ "block/softirq:dead", NULL,
+ blk_softirq_cpu_dead);
return 0;
}
subsys_initcall(blk_softirq_init);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f1aba26..a3ea826 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -780,9 +780,11 @@
/*
* If previous slice expired, start a new one otherwise renew/extend
* existing slice to make sure it is at least throtl_slice interval
- * long since now.
+ * long since now. New slice is started only for empty throttle group.
+ * If there is queued bio, that means there should be an active
+ * slice and it should be extended instead.
*/
- if (throtl_slice_used(tg, rw))
+ if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
throtl_start_new_slice(tg, rw);
else {
if (time_before(tg->slice_end[rw], jiffies + throtl_slice))
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index 3699995..a832426 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -233,6 +233,8 @@
return blkcipher_walk_done(desc, walk, -EINVAL);
}
+ bsize = min(walk->walk_blocksize, n);
+
walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY |
BLKCIPHER_WALK_DIFF);
if (!scatterwalk_aligned(&walk->in, walk->alignmask) ||
@@ -245,7 +247,6 @@
}
}
- bsize = min(walk->walk_blocksize, n);
n = scatterwalk_clamp(&walk->in, n);
n = scatterwalk_clamp(&walk->out, n);
diff --git a/crypto/echainiv.c b/crypto/echainiv.c
index 1b01fe9..e3d889b 100644
--- a/crypto/echainiv.c
+++ b/crypto/echainiv.c
@@ -1,8 +1,8 @@
/*
* echainiv: Encrypted Chain IV Generator
*
- * This generator generates an IV based on a sequence number by xoring it
- * with a salt and then encrypting it with the same key as used to encrypt
+ * This generator generates an IV based on a sequence number by multiplying
+ * it with a salt and then encrypting it with the same key as used to encrypt
* the plain text. This algorithm requires that the block size be equal
* to the IV size. It is mainly useful for CBC.
*
@@ -24,81 +24,17 @@
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
-#include <linux/mm.h>
#include <linux/module.h>
-#include <linux/percpu.h>
-#include <linux/spinlock.h>
+#include <linux/slab.h>
#include <linux/string.h>
-#define MAX_IV_SIZE 16
-
-static DEFINE_PER_CPU(u32 [MAX_IV_SIZE / sizeof(u32)], echainiv_iv);
-
-/* We don't care if we get preempted and read/write IVs from the next CPU. */
-static void echainiv_read_iv(u8 *dst, unsigned size)
-{
- u32 *a = (u32 *)dst;
- u32 __percpu *b = echainiv_iv;
-
- for (; size >= 4; size -= 4) {
- *a++ = this_cpu_read(*b);
- b++;
- }
-}
-
-static void echainiv_write_iv(const u8 *src, unsigned size)
-{
- const u32 *a = (const u32 *)src;
- u32 __percpu *b = echainiv_iv;
-
- for (; size >= 4; size -= 4) {
- this_cpu_write(*b, *a);
- a++;
- b++;
- }
-}
-
-static void echainiv_encrypt_complete2(struct aead_request *req, int err)
-{
- struct aead_request *subreq = aead_request_ctx(req);
- struct crypto_aead *geniv;
- unsigned int ivsize;
-
- if (err == -EINPROGRESS)
- return;
-
- if (err)
- goto out;
-
- geniv = crypto_aead_reqtfm(req);
- ivsize = crypto_aead_ivsize(geniv);
-
- echainiv_write_iv(subreq->iv, ivsize);
-
- if (req->iv != subreq->iv)
- memcpy(req->iv, subreq->iv, ivsize);
-
-out:
- if (req->iv != subreq->iv)
- kzfree(subreq->iv);
-}
-
-static void echainiv_encrypt_complete(struct crypto_async_request *base,
- int err)
-{
- struct aead_request *req = base->data;
-
- echainiv_encrypt_complete2(req, err);
- aead_request_complete(req, err);
-}
-
static int echainiv_encrypt(struct aead_request *req)
{
struct crypto_aead *geniv = crypto_aead_reqtfm(req);
struct aead_geniv_ctx *ctx = crypto_aead_ctx(geniv);
struct aead_request *subreq = aead_request_ctx(req);
- crypto_completion_t compl;
- void *data;
+ __be64 nseqno;
+ u64 seqno;
u8 *info;
unsigned int ivsize = crypto_aead_ivsize(geniv);
int err;
@@ -108,8 +44,6 @@
aead_request_set_tfm(subreq, ctx->child);
- compl = echainiv_encrypt_complete;
- data = req;
info = req->iv;
if (req->src != req->dst) {
@@ -127,29 +61,30 @@
return err;
}
- if (unlikely(!IS_ALIGNED((unsigned long)info,
- crypto_aead_alignmask(geniv) + 1))) {
- info = kmalloc(ivsize, req->base.flags &
- CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL:
- GFP_ATOMIC);
- if (!info)
- return -ENOMEM;
-
- memcpy(info, req->iv, ivsize);
- }
-
- aead_request_set_callback(subreq, req->base.flags, compl, data);
+ aead_request_set_callback(subreq, req->base.flags,
+ req->base.complete, req->base.data);
aead_request_set_crypt(subreq, req->dst, req->dst,
req->cryptlen, info);
aead_request_set_ad(subreq, req->assoclen);
- crypto_xor(info, ctx->salt, ivsize);
- scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1);
- echainiv_read_iv(info, ivsize);
+ memcpy(&nseqno, info + ivsize - 8, 8);
+ seqno = be64_to_cpu(nseqno);
+ memset(info, 0, ivsize);
- err = crypto_aead_encrypt(subreq);
- echainiv_encrypt_complete2(req, err);
- return err;
+ scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1);
+
+ do {
+ u64 a;
+
+ memcpy(&a, ctx->salt + ivsize - 8, 8);
+
+ a |= 1;
+ a *= seqno;
+
+ memcpy(info + ivsize - 8, &a, 8);
+ } while ((ivsize -= 8));
+
+ return crypto_aead_encrypt(subreq);
}
static int echainiv_decrypt(struct aead_request *req)
@@ -196,8 +131,7 @@
alg = crypto_spawn_aead_alg(spawn);
err = -EINVAL;
- if (inst->alg.ivsize & (sizeof(u32) - 1) ||
- inst->alg.ivsize > MAX_IV_SIZE)
+ if (inst->alg.ivsize & (sizeof(u64) - 1) || !inst->alg.ivsize)
goto free_inst;
inst->alg.encrypt = echainiv_encrypt;
@@ -206,7 +140,6 @@
inst->alg.init = aead_init_geniv;
inst->alg.exit = aead_exit_geniv;
- inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
inst->alg.base.cra_ctxsize = sizeof(struct aead_geniv_ctx);
inst->alg.base.cra_ctxsize += inst->alg.ivsize;
diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 877019a..8baab43 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -298,41 +298,48 @@
struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req);
+ unsigned int dst_len;
unsigned int pos;
-
- if (err == -EOVERFLOW)
- /* Decrypted value had no leading 0 byte */
- err = -EINVAL;
+ u8 *out_buf;
if (err)
goto done;
- if (req_ctx->child_req.dst_len != ctx->key_size - 1) {
- err = -EINVAL;
+ err = -EINVAL;
+ dst_len = req_ctx->child_req.dst_len;
+ if (dst_len < ctx->key_size - 1)
goto done;
+
+ out_buf = req_ctx->out_buf;
+ if (dst_len == ctx->key_size) {
+ if (out_buf[0] != 0x00)
+ /* Decrypted value had no leading 0 byte */
+ goto done;
+
+ dst_len--;
+ out_buf++;
}
- if (req_ctx->out_buf[0] != 0x02) {
- err = -EINVAL;
+ if (out_buf[0] != 0x02)
goto done;
- }
- for (pos = 1; pos < req_ctx->child_req.dst_len; pos++)
- if (req_ctx->out_buf[pos] == 0x00)
+
+ for (pos = 1; pos < dst_len; pos++)
+ if (out_buf[pos] == 0x00)
break;
- if (pos < 9 || pos == req_ctx->child_req.dst_len) {
- err = -EINVAL;
+ if (pos < 9 || pos == dst_len)
goto done;
- }
pos++;
- if (req->dst_len < req_ctx->child_req.dst_len - pos)
+ err = 0;
+
+ if (req->dst_len < dst_len - pos)
err = -EOVERFLOW;
- req->dst_len = req_ctx->child_req.dst_len - pos;
+ req->dst_len = dst_len - pos;
if (!err)
sg_copy_from_buffer(req->dst,
sg_nents_for_len(req->dst, req->dst_len),
- req_ctx->out_buf + pos, req->dst_len);
+ out_buf + pos, req->dst_len);
done:
kzfree(req_ctx->out_buf);
diff --git a/drivers/Makefile b/drivers/Makefile
index 53abb4a..f0afdfb 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -29,6 +29,8 @@
# was used and do nothing if so
obj-$(CONFIG_PNP) += pnp/
obj-y += amba/
+
+obj-y += clk/
# Many drivers will want to use DMA so this has to be made available
# really early.
obj-$(CONFIG_DMADEVICES) += dma/
@@ -142,8 +144,6 @@
obj-$(CONFIG_VLYNQ) += vlynq/
obj-$(CONFIG_STAGING) += staging/
obj-y += platform/
-#common clk code
-obj-y += clk/
obj-$(CONFIG_MAILBOX) += mailbox/
obj-$(CONFIG_HWSPINLOCK) += hwspinlock/
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 445ce28..535e782 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -77,6 +77,9 @@
endif
+config ACPI_SPCR_TABLE
+ bool
+
config ACPI_SLEEP
bool
depends on SUSPEND || HIBERNATION
@@ -227,7 +230,6 @@
config ACPI_CPPC_LIB
bool
depends on ACPI_PROCESSOR
- depends on !ACPI_CPU_FREQ_PSS
select MAILBOX
select PCC
help
@@ -462,6 +464,9 @@
source "drivers/acpi/apei/Kconfig"
source "drivers/acpi/dptf/Kconfig"
+config ACPI_WATCHDOG
+ bool
+
config ACPI_EXTLOG
tristate "Extended Error Log support"
depends on X86_MCE && X86_LOCAL_APIC
@@ -521,4 +526,8 @@
userspace. The configurable ACPI groups will be visible under
/config/acpi, assuming configfs is mounted under /config.
+if ARM64
+source "drivers/acpi/arm64/Kconfig"
+endif
+
endif # ACPI
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 5ae9d85..9ed0878 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -56,6 +56,7 @@
acpi-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o
acpi-y += acpi_lpat.o
acpi-$(CONFIG_ACPI_GENERIC_GSI) += gsi.o
+acpi-$(CONFIG_ACPI_WATCHDOG) += acpi_watchdog.o
# These are (potentially) separate modules
@@ -81,6 +82,7 @@
obj-$(CONFIG_ACPI_CUSTOM_METHOD)+= custom_method.o
obj-$(CONFIG_ACPI_BGRT) += bgrt.o
obj-$(CONFIG_ACPI_CPPC_LIB) += cppc_acpi.o
+obj-$(CONFIG_ACPI_SPCR_TABLE) += spcr.o
obj-$(CONFIG_ACPI_DEBUGGER_USER) += acpi_dbg.o
# processor has its own "processor." module_param namespace
@@ -105,3 +107,5 @@
video-objs += acpi_video.o video_detect.o
obj-y += dptf/
+
+obj-$(CONFIG_ARM64) += arm64/
diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index 1daf9c4..d58fbf7 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c
@@ -42,6 +42,7 @@
struct apd_device_desc {
unsigned int flags;
unsigned int fixed_clk_rate;
+ struct property_entry *properties;
int (*setup)(struct apd_private_data *pdata);
};
@@ -71,22 +72,35 @@
}
#ifdef CONFIG_X86_AMD_PLATFORM_DEVICE
-static struct apd_device_desc cz_i2c_desc = {
+static const struct apd_device_desc cz_i2c_desc = {
.setup = acpi_apd_setup,
.fixed_clk_rate = 133000000,
};
-static struct apd_device_desc cz_uart_desc = {
+static struct property_entry uart_properties[] = {
+ PROPERTY_ENTRY_U32("reg-io-width", 4),
+ PROPERTY_ENTRY_U32("reg-shift", 2),
+ PROPERTY_ENTRY_BOOL("snps,uart-16550-compatible"),
+ { },
+};
+
+static const struct apd_device_desc cz_uart_desc = {
.setup = acpi_apd_setup,
.fixed_clk_rate = 48000000,
+ .properties = uart_properties,
};
#endif
#ifdef CONFIG_ARM64
-static struct apd_device_desc xgene_i2c_desc = {
+static const struct apd_device_desc xgene_i2c_desc = {
.setup = acpi_apd_setup,
.fixed_clk_rate = 100000000,
};
+
+static const struct apd_device_desc vulcan_spi_desc = {
+ .setup = acpi_apd_setup,
+ .fixed_clk_rate = 133000000,
+};
#endif
#else
@@ -125,6 +139,12 @@
goto err_out;
}
+ if (dev_desc->properties) {
+ ret = device_add_properties(&adev->dev, dev_desc->properties);
+ if (ret)
+ goto err_out;
+ }
+
adev->driver_data = pdata;
pdev = acpi_create_platform_device(adev);
if (!IS_ERR_OR_NULL(pdev))
@@ -149,6 +169,7 @@
#endif
#ifdef CONFIG_ARM64
{ "APMC0D0F", APD_ADDR(xgene_i2c_desc) },
+ { "BRCM900D", APD_ADDR(vulcan_spi_desc) },
#endif
{ }
};
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 357a0b8..5520102 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -75,6 +75,7 @@
const char *clk_con_id;
unsigned int prv_offset;
size_t prv_size_override;
+ struct property_entry *properties;
void (*setup)(struct lpss_private_data *pdata);
};
@@ -163,11 +164,19 @@
.prv_offset = 0x800,
};
+static struct property_entry uart_properties[] = {
+ PROPERTY_ENTRY_U32("reg-io-width", 4),
+ PROPERTY_ENTRY_U32("reg-shift", 2),
+ PROPERTY_ENTRY_BOOL("snps,uart-16550-compatible"),
+ { },
+};
+
static const struct lpss_device_desc lpt_uart_dev_desc = {
.flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_LTR,
.clk_con_id = "baudclk",
.prv_offset = 0x800,
.setup = lpss_uart_setup,
+ .properties = uart_properties,
};
static const struct lpss_device_desc lpt_sdio_dev_desc = {
@@ -189,6 +198,7 @@
.clk_con_id = "baudclk",
.prv_offset = 0x800,
.setup = lpss_uart_setup,
+ .properties = uart_properties,
};
static const struct lpss_device_desc bsw_uart_dev_desc = {
@@ -197,6 +207,7 @@
.clk_con_id = "baudclk",
.prv_offset = 0x800,
.setup = lpss_uart_setup,
+ .properties = uart_properties,
};
static const struct lpss_device_desc byt_spi_dev_desc = {
@@ -440,6 +451,12 @@
goto err_out;
}
+ if (dev_desc->properties) {
+ ret = device_add_properties(&adev->dev, dev_desc->properties);
+ if (ret)
+ goto err_out;
+ }
+
adev->driver_data = pdata;
pdev = acpi_create_platform_device(adev);
if (!IS_ERR_OR_NULL(pdev)) {
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 159f7f1..b200ae1 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/dma-mapping.h>
+#include <linux/pci.h>
#include <linux/platform_device.h>
#include "internal.h"
@@ -30,6 +31,22 @@
{"", 0},
};
+static void acpi_platform_fill_resource(struct acpi_device *adev,
+ const struct resource *src, struct resource *dest)
+{
+ struct device *parent;
+
+ *dest = *src;
+
+ /*
+ * If the device has parent we need to take its resources into
+ * account as well because this device might consume part of those.
+ */
+ parent = acpi_get_first_physical_node(adev->parent);
+ if (parent && dev_is_pci(parent))
+ dest->parent = pci_find_resource(to_pci_dev(parent), dest);
+}
+
/**
* acpi_create_platform_device - Create platform device for ACPI device node
* @adev: ACPI device node to create a platform device for.
@@ -70,7 +87,8 @@
}
count = 0;
list_for_each_entry(rentry, &resource_list, node)
- resources[count++] = *rentry->res;
+ acpi_platform_fill_resource(adev, rentry->res,
+ &resources[count++]);
acpi_dev_free_resource_list(&resource_list);
}
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index c7ba948..3de3b6b 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -182,6 +182,11 @@
void __weak arch_unregister_cpu(int cpu) {}
+int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+{
+ return -ENODEV;
+}
+
static int acpi_processor_hotadd_init(struct acpi_processor *pr)
{
unsigned long long sta;
@@ -300,8 +305,11 @@
* Extra Processor objects may be enumerated on MP systems with
* less than the max # of CPUs. They should be ignored _iff
* they are physically not present.
+ *
+ * NOTE: Even if the processor has a cpuid, it may not be present
+ * because cpuid <-> apicid mapping is persistent now.
*/
- if (invalid_logical_cpuid(pr->id)) {
+ if (invalid_logical_cpuid(pr->id) || !cpu_present(pr->id)) {
int ret = acpi_processor_hotadd_init(pr);
if (ret)
return ret;
@@ -573,8 +581,102 @@
.attach = acpi_processor_container_attach,
};
+/* The number of the unique processor IDs */
+static int nr_unique_ids __initdata;
+
+/* The number of the duplicate processor IDs */
+static int nr_duplicate_ids __initdata;
+
+/* Used to store the unique processor IDs */
+static int unique_processor_ids[] __initdata = {
+ [0 ... NR_CPUS - 1] = -1,
+};
+
+/* Used to store the duplicate processor IDs */
+static int duplicate_processor_ids[] __initdata = {
+ [0 ... NR_CPUS - 1] = -1,
+};
+
+static void __init processor_validated_ids_update(int proc_id)
+{
+ int i;
+
+ if (nr_unique_ids == NR_CPUS||nr_duplicate_ids == NR_CPUS)
+ return;
+
+ /*
+ * Firstly, compare the proc_id with duplicate IDs, if the proc_id is
+ * already in the IDs, do nothing.
+ */
+ for (i = 0; i < nr_duplicate_ids; i++) {
+ if (duplicate_processor_ids[i] == proc_id)
+ return;
+ }
+
+ /*
+ * Secondly, compare the proc_id with unique IDs, if the proc_id is in
+ * the IDs, put it in the duplicate IDs.
+ */
+ for (i = 0; i < nr_unique_ids; i++) {
+ if (unique_processor_ids[i] == proc_id) {
+ duplicate_processor_ids[nr_duplicate_ids] = proc_id;
+ nr_duplicate_ids++;
+ return;
+ }
+ }
+
+ /*
+ * Lastly, the proc_id is a unique ID, put it in the unique IDs.
+ */
+ unique_processor_ids[nr_unique_ids] = proc_id;
+ nr_unique_ids++;
+}
+
+static acpi_status __init acpi_processor_ids_walk(acpi_handle handle,
+ u32 lvl,
+ void *context,
+ void **rv)
+{
+ acpi_status status;
+ union acpi_object object = { 0 };
+ struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
+
+ status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+ if (ACPI_FAILURE(status))
+ acpi_handle_info(handle, "Not get the processor object\n");
+ else
+ processor_validated_ids_update(object.processor.proc_id);
+
+ return AE_OK;
+}
+
+static void __init acpi_processor_check_duplicates(void)
+{
+ /* Search all processor nodes in ACPI namespace */
+ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX,
+ acpi_processor_ids_walk,
+ NULL, NULL, NULL);
+}
+
+bool __init acpi_processor_validate_proc_id(int proc_id)
+{
+ int i;
+
+ /*
+ * compare the proc_id with duplicate IDs, if the proc_id is already
+ * in the duplicate IDs, return true, otherwise, return false.
+ */
+ for (i = 0; i < nr_duplicate_ids; i++) {
+ if (duplicate_processor_ids[i] == proc_id)
+ return true;
+ }
+ return false;
+}
+
void __init acpi_processor_init(void)
{
+ acpi_processor_check_duplicates();
acpi_scan_add_handler_with_hotplug(&processor_handler, "processor");
acpi_scan_add_handler(&processor_container_handler);
}
diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c
new file mode 100644
index 0000000..13caebd
--- /dev/null
+++ b/drivers/acpi/acpi_watchdog.c
@@ -0,0 +1,123 @@
+/*
+ * ACPI watchdog table parsing support.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "ACPI: watchdog: " fmt
+
+#include <linux/acpi.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+
+#include "internal.h"
+
+/**
+ * Returns true if this system should prefer ACPI based watchdog instead of
+ * the native one (which are typically the same hardware).
+ */
+bool acpi_has_watchdog(void)
+{
+ struct acpi_table_header hdr;
+
+ if (acpi_disabled)
+ return false;
+
+ return ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_WDAT, 0, &hdr));
+}
+EXPORT_SYMBOL_GPL(acpi_has_watchdog);
+
+void __init acpi_watchdog_init(void)
+{
+ const struct acpi_wdat_entry *entries;
+ const struct acpi_table_wdat *wdat;
+ struct list_head resource_list;
+ struct resource_entry *rentry;
+ struct platform_device *pdev;
+ struct resource *resources;
+ size_t nresources = 0;
+ acpi_status status;
+ int i;
+
+ status = acpi_get_table(ACPI_SIG_WDAT, 0,
+ (struct acpi_table_header **)&wdat);
+ if (ACPI_FAILURE(status)) {
+ /* It is fine if there is no WDAT */
+ return;
+ }
+
+ /* Watchdog disabled by BIOS */
+ if (!(wdat->flags & ACPI_WDAT_ENABLED))
+ return;
+
+ /* Skip legacy PCI WDT devices */
+ if (wdat->pci_segment != 0xff || wdat->pci_bus != 0xff ||
+ wdat->pci_device != 0xff || wdat->pci_function != 0xff)
+ return;
+
+ INIT_LIST_HEAD(&resource_list);
+
+ entries = (struct acpi_wdat_entry *)(wdat + 1);
+ for (i = 0; i < wdat->entries; i++) {
+ const struct acpi_generic_address *gas;
+ struct resource_entry *rentry;
+ struct resource res;
+ bool found;
+
+ gas = &entries[i].register_region;
+
+ res.start = gas->address;
+ if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+ res.flags = IORESOURCE_MEM;
+ res.end = res.start + ALIGN(gas->access_width, 4);
+ } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
+ res.flags = IORESOURCE_IO;
+ res.end = res.start + gas->access_width;
+ } else {
+ pr_warn("Unsupported address space: %u\n",
+ gas->space_id);
+ goto fail_free_resource_list;
+ }
+
+ found = false;
+ resource_list_for_each_entry(rentry, &resource_list) {
+ if (resource_contains(rentry->res, &res)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ rentry = resource_list_create_entry(NULL, 0);
+ if (!rentry)
+ goto fail_free_resource_list;
+
+ *rentry->res = res;
+ resource_list_add_tail(rentry, &resource_list);
+ nresources++;
+ }
+ }
+
+ resources = kcalloc(nresources, sizeof(*resources), GFP_KERNEL);
+ if (!resources)
+ goto fail_free_resource_list;
+
+ i = 0;
+ resource_list_for_each_entry(rentry, &resource_list)
+ resources[i++] = *rentry->res;
+
+ pdev = platform_device_register_simple("wdat_wdt", PLATFORM_DEVID_NONE,
+ resources, nresources);
+ if (IS_ERR(pdev))
+ pr_err("Failed to create platform device\n");
+
+ kfree(resources);
+
+fail_free_resource_list:
+ resource_list_free(&resource_list);
+}
diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index 227bb7b..32d93ed 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile
@@ -175,6 +175,7 @@
utresrc.o \
utstate.o \
utstring.o \
+ utstrtoul64.o \
utxface.o \
utxfinit.o \
utxferror.o \
diff --git a/drivers/acpi/acpica/acapps.h b/drivers/acpi/acpica/acapps.h
index ca2c060..0bd6307 100644
--- a/drivers/acpi/acpica/acapps.h
+++ b/drivers/acpi/acpica/acapps.h
@@ -44,7 +44,9 @@
#ifndef _ACAPPS
#define _ACAPPS
-#include <stdio.h>
+#ifdef ACPI_USE_STANDARD_HEADERS
+#include <sys/stat.h>
+#endif /* ACPI_USE_STANDARD_HEADERS */
/* Common info for tool signons */
@@ -81,13 +83,13 @@
/* Macros for usage messages */
#define ACPI_USAGE_HEADER(usage) \
- acpi_os_printf ("Usage: %s\nOptions:\n", usage);
+ printf ("Usage: %s\nOptions:\n", usage);
#define ACPI_USAGE_TEXT(description) \
- acpi_os_printf (description);
+ printf (description);
#define ACPI_OPTION(name, description) \
- acpi_os_printf (" %-20s%s\n", name, description);
+ printf (" %-20s%s\n", name, description);
/* Check for unexpected exceptions */
diff --git a/drivers/acpi/acpica/acdebug.h b/drivers/acpi/acpica/acdebug.h
index f6404ea..94737f8 100644
--- a/drivers/acpi/acpica/acdebug.h
+++ b/drivers/acpi/acpica/acdebug.h
@@ -155,7 +155,7 @@
void acpi_db_disassemble_aml(char *statements, union acpi_parse_object *op);
-void acpi_db_batch_execute(char *count_arg);
+void acpi_db_evaluate_predefined_names(void);
/*
* dbnames - namespace commands
diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h
index 77af91c..92fa47c 100644
--- a/drivers/acpi/acpica/acevents.h
+++ b/drivers/acpi/acpica/acevents.h
@@ -86,6 +86,9 @@
acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info);
acpi_status
+acpi_ev_mask_gpe(struct acpi_gpe_event_info *gpe_event_info, u8 is_masked);
+
+acpi_status
acpi_ev_add_gpe_reference(struct acpi_gpe_event_info *gpe_event_info);
acpi_status
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index fded776..750fa82 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -317,6 +317,7 @@
ACPI_INIT_GLOBAL(u8, acpi_gbl_cstyle_disassembly, TRUE);
ACPI_INIT_GLOBAL(u8, acpi_gbl_force_aml_disassembly, FALSE);
ACPI_INIT_GLOBAL(u8, acpi_gbl_dm_opt_verbose, TRUE);
+ACPI_INIT_GLOBAL(u8, acpi_gbl_dm_emit_external_opcodes, FALSE);
ACPI_GLOBAL(u8, acpi_gbl_dm_opt_disasm);
ACPI_GLOBAL(u8, acpi_gbl_dm_opt_listing);
@@ -382,6 +383,7 @@
ACPI_INIT_GLOBAL(ACPI_FILE, acpi_gbl_debug_file, NULL);
ACPI_INIT_GLOBAL(ACPI_FILE, acpi_gbl_output_file, NULL);
+ACPI_INIT_GLOBAL(u8, acpi_gbl_debug_timeout, FALSE);
/* Print buffer */
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 13331d7..dff1207 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -484,6 +484,7 @@
u8 flags; /* Misc info about this GPE */
u8 gpe_number; /* This GPE */
u8 runtime_count; /* References to a run GPE */
+ u8 disable_for_dispatch; /* Masked during dispatching */
};
/* Information about a GPE register pair, one per each status/enable pair in an array */
@@ -494,6 +495,7 @@
u16 base_gpe_number; /* Base GPE number for this register */
u8 enable_for_wake; /* GPEs to keep enabled when sleeping */
u8 enable_for_run; /* GPEs to keep enabled when running */
+ u8 mask_for_run; /* GPEs to keep masked when running */
u8 enable_mask; /* Current mask of enabled GPEs */
};
diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index f33a4ba..bb7fca1 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -130,6 +130,9 @@
acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node);
acpi_status
+acpi_ns_execute_table(u32 table_index, struct acpi_namespace_node *start_node);
+
+acpi_status
acpi_ns_one_complete_parse(u32 pass_number,
u32 table_index,
struct acpi_namespace_node *start_node);
@@ -296,6 +299,11 @@
acpi_ns_pattern_match(struct acpi_namespace_node *obj_node, char *search_for);
acpi_status
+acpi_ns_get_node_unlocked(struct acpi_namespace_node *prefix_node,
+ const char *external_pathname,
+ u32 flags, struct acpi_namespace_node **out_node);
+
+acpi_status
acpi_ns_get_node(struct acpi_namespace_node *prefix_node,
const char *external_pathname,
u32 flags, struct acpi_namespace_node **out_node);
diff --git a/drivers/acpi/acpica/acparser.h b/drivers/acpi/acpica/acparser.h
index fc30577..939d411 100644
--- a/drivers/acpi/acpica/acparser.h
+++ b/drivers/acpi/acpica/acparser.h
@@ -78,6 +78,8 @@
*/
acpi_status acpi_ps_execute_method(struct acpi_evaluate_info *info);
+acpi_status acpi_ps_execute_table(struct acpi_evaluate_info *info);
+
/*
* psargs - Parse AML opcode arguments
*/
diff --git a/drivers/acpi/acpica/actables.h b/drivers/acpi/acpica/actables.h
index cd5a135..e85953b 100644
--- a/drivers/acpi/acpica/actables.h
+++ b/drivers/acpi/acpica/actables.h
@@ -123,6 +123,14 @@
void acpi_tb_uninstall_table(struct acpi_table_desc *table_desc);
+acpi_status
+acpi_tb_load_table(u32 table_index, struct acpi_namespace_node *parent_node);
+
+acpi_status
+acpi_tb_install_and_load_table(struct acpi_table_header *table,
+ acpi_physical_address address,
+ u8 flags, u8 override, u32 *table_index);
+
void acpi_tb_terminate(void);
acpi_status acpi_tb_delete_namespace_by_owner(u32 table_index);
@@ -155,10 +163,6 @@
acpi_tb_install_table_with_override(struct acpi_table_desc *new_table_desc,
u8 override, u32 *table_index);
-acpi_status
-acpi_tb_install_fixed_table(acpi_physical_address address,
- char *signature, u32 *table_index);
-
acpi_status acpi_tb_parse_root_table(acpi_physical_address rsdp_address);
/*
diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index a7dbb2b..0a1b53c 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -114,13 +114,25 @@
/*
* Common error message prefixes
*/
+#ifndef ACPI_MSG_ERROR
#define ACPI_MSG_ERROR "ACPI Error: "
+#endif
+#ifndef ACPI_MSG_EXCEPTION
#define ACPI_MSG_EXCEPTION "ACPI Exception: "
+#endif
+#ifndef ACPI_MSG_WARNING
#define ACPI_MSG_WARNING "ACPI Warning: "
+#endif
+#ifndef ACPI_MSG_INFO
#define ACPI_MSG_INFO "ACPI: "
+#endif
+#ifndef ACPI_MSG_BIOS_ERROR
#define ACPI_MSG_BIOS_ERROR "ACPI BIOS Error (bug): "
+#endif
+#ifndef ACPI_MSG_BIOS_WARNING
#define ACPI_MSG_BIOS_WARNING "ACPI BIOS Warning (bug): "
+#endif
/*
* Common message suffix
@@ -184,14 +196,15 @@
int acpi_ut_stricmp(char *string1, char *string2);
-acpi_status
-acpi_ut_strtoul64(char *string,
- u32 base, u32 max_integer_byte_width, u64 *ret_integer);
+acpi_status acpi_ut_strtoul64(char *string, u32 flags, u64 *ret_integer);
-/* Values for max_integer_byte_width above */
-
-#define ACPI_MAX32_BYTE_WIDTH 4
-#define ACPI_MAX64_BYTE_WIDTH 8
+/*
+ * Values for Flags above
+ * Note: LIMIT values correspond to acpi_gbl_integer_byte_width values (4/8)
+ */
+#define ACPI_STRTOUL_32BIT 0x04 /* 4 bytes */
+#define ACPI_STRTOUL_64BIT 0x08 /* 8 bytes */
+#define ACPI_STRTOUL_BASE16 0x10 /* Default: Base10/16 */
/*
* utglobal - Global data structures and procedures
@@ -221,6 +234,8 @@
char acpi_ut_hex_to_ascii_char(u64 integer, u32 position);
+acpi_status acpi_ut_ascii_to_hex_byte(char *two_ascii_chars, u8 *return_byte);
+
u8 acpi_ut_ascii_char_to_hex(int hex_char);
u8 acpi_ut_valid_object_type(acpi_object_type type);
@@ -318,6 +333,11 @@
const char *module_name, u32 component_id, u8 *ptr);
void
+acpi_ut_str_exit(u32 line_number,
+ const char *function_name,
+ const char *module_name, u32 component_id, const char *string);
+
+void
acpi_ut_debug_dump_buffer(u8 *buffer, u32 count, u32 display, u32 component_id);
void acpi_ut_dump_buffer(u8 *buffer, u32 count, u32 display, u32 offset);
@@ -707,25 +727,6 @@
const char *acpi_ah_match_uuid(u8 *data);
/*
- * utprint - printf/vprintf output functions
- */
-const char *acpi_ut_scan_number(const char *string, u64 *number_ptr);
-
-const char *acpi_ut_print_number(char *string, u64 number);
-
-int
-acpi_ut_vsnprintf(char *string,
- acpi_size size, const char *format, va_list args);
-
-int acpi_ut_snprintf(char *string, acpi_size size, const char *format, ...);
-
-#ifdef ACPI_APPLICATION
-int acpi_ut_file_vprintf(ACPI_FILE file, const char *format, va_list args);
-
-int acpi_ut_file_printf(ACPI_FILE file, const char *format, ...);
-#endif
-
-/*
* utuuid -- UUID support functions
*/
#if (defined ACPI_ASL_COMPILER || defined ACPI_EXEC_APP || defined ACPI_HELP_APP)
diff --git a/drivers/acpi/acpica/dbconvert.c b/drivers/acpi/acpica/dbconvert.c
index 7cd07b2..147ce88 100644
--- a/drivers/acpi/acpica/dbconvert.c
+++ b/drivers/acpi/acpica/dbconvert.c
@@ -277,9 +277,10 @@
default:
object->type = ACPI_TYPE_INTEGER;
- status =
- acpi_ut_strtoul64(string, 16, acpi_gbl_integer_byte_width,
- &object->integer.value);
+ status = acpi_ut_strtoul64(string,
+ (acpi_gbl_integer_byte_width |
+ ACPI_STRTOUL_BASE16),
+ &object->integer.value);
break;
}
diff --git a/drivers/acpi/acpica/dbexec.c b/drivers/acpi/acpica/dbexec.c
index 12df291..fe3da7c 100644
--- a/drivers/acpi/acpica/dbexec.c
+++ b/drivers/acpi/acpica/dbexec.c
@@ -392,43 +392,49 @@
acpi_db_execution_walk, NULL, NULL,
NULL);
return;
- } else {
- name_string = ACPI_ALLOCATE(strlen(name) + 1);
- if (!name_string) {
- return;
- }
-
- memset(&acpi_gbl_db_method_info, 0,
- sizeof(struct acpi_db_method_info));
-
- strcpy(name_string, name);
- acpi_ut_strupr(name_string);
- acpi_gbl_db_method_info.name = name_string;
- acpi_gbl_db_method_info.args = args;
- acpi_gbl_db_method_info.types = types;
- acpi_gbl_db_method_info.flags = flags;
-
- return_obj.pointer = NULL;
- return_obj.length = ACPI_ALLOCATE_BUFFER;
-
- status = acpi_db_execute_setup(&acpi_gbl_db_method_info);
- if (ACPI_FAILURE(status)) {
- ACPI_FREE(name_string);
- return;
- }
-
- /* Get the NS node, determines existence also */
-
- status = acpi_get_handle(NULL, acpi_gbl_db_method_info.pathname,
- &acpi_gbl_db_method_info.method);
- if (ACPI_SUCCESS(status)) {
- status =
- acpi_db_execute_method(&acpi_gbl_db_method_info,
- &return_obj);
- }
- ACPI_FREE(name_string);
}
+ name_string = ACPI_ALLOCATE(strlen(name) + 1);
+ if (!name_string) {
+ return;
+ }
+
+ memset(&acpi_gbl_db_method_info, 0, sizeof(struct acpi_db_method_info));
+ strcpy(name_string, name);
+ acpi_ut_strupr(name_string);
+
+ /* Subcommand to Execute all predefined names in the namespace */
+
+ if (!strncmp(name_string, "PREDEF", 6)) {
+ acpi_db_evaluate_predefined_names();
+ ACPI_FREE(name_string);
+ return;
+ }
+
+ acpi_gbl_db_method_info.name = name_string;
+ acpi_gbl_db_method_info.args = args;
+ acpi_gbl_db_method_info.types = types;
+ acpi_gbl_db_method_info.flags = flags;
+
+ return_obj.pointer = NULL;
+ return_obj.length = ACPI_ALLOCATE_BUFFER;
+
+ status = acpi_db_execute_setup(&acpi_gbl_db_method_info);
+ if (ACPI_FAILURE(status)) {
+ ACPI_FREE(name_string);
+ return;
+ }
+
+ /* Get the NS node, determines existence also */
+
+ status = acpi_get_handle(NULL, acpi_gbl_db_method_info.pathname,
+ &acpi_gbl_db_method_info.method);
+ if (ACPI_SUCCESS(status)) {
+ status = acpi_db_execute_method(&acpi_gbl_db_method_info,
+ &return_obj);
+ }
+ ACPI_FREE(name_string);
+
/*
* Allow any handlers in separate threads to complete.
* (Such as Notify handlers invoked from AML executed above).
diff --git a/drivers/acpi/acpica/dbfileio.c b/drivers/acpi/acpica/dbfileio.c
index 4832879..6f05b8c 100644
--- a/drivers/acpi/acpica/dbfileio.c
+++ b/drivers/acpi/acpica/dbfileio.c
@@ -46,14 +46,12 @@
#include "accommon.h"
#include "acdebug.h"
#include "actables.h"
-#include <stdio.h>
-#ifdef ACPI_APPLICATION
-#include "acapps.h"
-#endif
#define _COMPONENT ACPI_CA_DEBUGGER
ACPI_MODULE_NAME("dbfileio")
+#ifdef ACPI_APPLICATION
+#include "acapps.h"
#ifdef ACPI_DEBUGGER
/*******************************************************************************
*
@@ -69,8 +67,6 @@
void acpi_db_close_debug_file(void)
{
-#ifdef ACPI_APPLICATION
-
if (acpi_gbl_debug_file) {
fclose(acpi_gbl_debug_file);
acpi_gbl_debug_file = NULL;
@@ -78,7 +74,6 @@
acpi_os_printf("Debug output file %s closed\n",
acpi_gbl_db_debug_filename);
}
-#endif
}
/*******************************************************************************
@@ -96,8 +91,6 @@
void acpi_db_open_debug_file(char *name)
{
-#ifdef ACPI_APPLICATION
-
acpi_db_close_debug_file();
acpi_gbl_debug_file = fopen(name, "w+");
if (!acpi_gbl_debug_file) {
@@ -109,8 +102,6 @@
strncpy(acpi_gbl_db_debug_filename, name,
sizeof(acpi_gbl_db_debug_filename));
acpi_gbl_db_output_to_file = TRUE;
-
-#endif
}
#endif
@@ -152,12 +143,13 @@
return (status);
}
- fprintf(stderr,
- "Acpi table [%4.4s] successfully installed and loaded\n",
- table->signature);
+ acpi_os_printf
+ ("Acpi table [%4.4s] successfully installed and loaded\n",
+ table->signature);
table_list_head = table_list_head->next;
}
return (AE_OK);
}
+#endif
diff --git a/drivers/acpi/acpica/dbinput.c b/drivers/acpi/acpica/dbinput.c
index 7cd5d2e..068214f 100644
--- a/drivers/acpi/acpica/dbinput.c
+++ b/drivers/acpi/acpica/dbinput.c
@@ -286,6 +286,8 @@
{1, " \"Ascii String\"", "String method argument\n"},
{1, " (Hex Byte List)", "Buffer method argument\n"},
{1, " [Package Element List]", "Package method argument\n"},
+ {5, " Execute predefined",
+ "Execute all predefined (public) methods\n"},
{1, " Go", "Allow method to run to completion\n"},
{1, " Information", "Display info about the current method\n"},
{1, " Into", "Step into (not over) a method call\n"},
diff --git a/drivers/acpi/acpica/dbmethod.c b/drivers/acpi/acpica/dbmethod.c
index f17a86f..314b94c 100644
--- a/drivers/acpi/acpica/dbmethod.c
+++ b/drivers/acpi/acpica/dbmethod.c
@@ -52,6 +52,11 @@
#define _COMPONENT ACPI_CA_DEBUGGER
ACPI_MODULE_NAME("dbmethod")
+/* Local prototypes */
+static acpi_status
+acpi_db_walk_for_execute(acpi_handle obj_handle,
+ u32 nesting_level, void *context, void **return_value);
+
/*******************************************************************************
*
* FUNCTION: acpi_db_set_method_breakpoint
@@ -66,6 +71,7 @@
* AML offset
*
******************************************************************************/
+
void
acpi_db_set_method_breakpoint(char *location,
struct acpi_walk_state *walk_state,
@@ -367,3 +373,129 @@
acpi_ut_release_owner_id(&obj_desc->method.owner_id);
return (AE_OK);
}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_db_walk_for_execute
+ *
+ * PARAMETERS: Callback from walk_namespace
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Batch execution module. Currently only executes predefined
+ * ACPI names.
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_db_walk_for_execute(acpi_handle obj_handle,
+ u32 nesting_level, void *context, void **return_value)
+{
+ struct acpi_namespace_node *node =
+ (struct acpi_namespace_node *)obj_handle;
+ struct acpi_db_execute_walk *info =
+ (struct acpi_db_execute_walk *)context;
+ struct acpi_buffer return_obj;
+ acpi_status status;
+ char *pathname;
+ u32 i;
+ struct acpi_device_info *obj_info;
+ struct acpi_object_list param_objects;
+ union acpi_object params[ACPI_METHOD_NUM_ARGS];
+ const union acpi_predefined_info *predefined;
+
+ predefined = acpi_ut_match_predefined_method(node->name.ascii);
+ if (!predefined) {
+ return (AE_OK);
+ }
+
+ if (node->type == ACPI_TYPE_LOCAL_SCOPE) {
+ return (AE_OK);
+ }
+
+ pathname = acpi_ns_get_external_pathname(node);
+ if (!pathname) {
+ return (AE_OK);
+ }
+
+ /* Get the object info for number of method parameters */
+
+ status = acpi_get_object_info(obj_handle, &obj_info);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ param_objects.pointer = NULL;
+ param_objects.count = 0;
+
+ if (obj_info->type == ACPI_TYPE_METHOD) {
+
+ /* Setup default parameters */
+
+ for (i = 0; i < obj_info->param_count; i++) {
+ params[i].type = ACPI_TYPE_INTEGER;
+ params[i].integer.value = 1;
+ }
+
+ param_objects.pointer = params;
+ param_objects.count = obj_info->param_count;
+ }
+
+ ACPI_FREE(obj_info);
+ return_obj.pointer = NULL;
+ return_obj.length = ACPI_ALLOCATE_BUFFER;
+
+ /* Do the actual method execution */
+
+ acpi_gbl_method_executing = TRUE;
+
+ status = acpi_evaluate_object(node, NULL, ¶m_objects, &return_obj);
+
+ acpi_os_printf("%-32s returned %s\n", pathname,
+ acpi_format_exception(status));
+ acpi_gbl_method_executing = FALSE;
+ ACPI_FREE(pathname);
+
+ /* Ignore status from method execution */
+
+ status = AE_OK;
+
+ /* Update count, check if we have executed enough methods */
+
+ info->count++;
+ if (info->count >= info->max_count) {
+ status = AE_CTRL_TERMINATE;
+ }
+
+ return (status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_db_evaluate_predefined_names
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Namespace batch execution. Execute predefined names in the
+ * namespace, up to the max count, if specified.
+ *
+ ******************************************************************************/
+
+void acpi_db_evaluate_predefined_names(void)
+{
+ struct acpi_db_execute_walk info;
+
+ info.count = 0;
+ info.max_count = ACPI_UINT32_MAX;
+
+ /* Search all nodes in namespace */
+
+ (void)acpi_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX, acpi_db_walk_for_execute,
+ NULL, (void *)&info, NULL);
+
+ acpi_os_printf("Evaluated %u predefined names in the namespace\n",
+ info.count);
+}
diff --git a/drivers/acpi/acpica/dbobject.c b/drivers/acpi/acpica/dbobject.c
index 1d59e8b..08eaaf3 100644
--- a/drivers/acpi/acpica/dbobject.c
+++ b/drivers/acpi/acpica/dbobject.c
@@ -142,11 +142,11 @@
case ACPI_TYPE_STRING:
- acpi_os_printf("(%u) \"%.24s",
+ acpi_os_printf("(%u) \"%.60s",
obj_desc->string.length,
obj_desc->string.pointer);
- if (obj_desc->string.length > 24) {
+ if (obj_desc->string.length > 60) {
acpi_os_printf("...");
} else {
acpi_os_printf("\"");
diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index 47c7b52..32e9ddc 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -99,11 +99,14 @@
"Method auto-serialization parse [%4.4s] %p\n",
acpi_ut_get_node_name(node), node));
+ acpi_ex_enter_interpreter();
+
/* Create/Init a root op for the method parse tree */
op = acpi_ps_alloc_op(AML_METHOD_OP, obj_desc->method.aml_start);
if (!op) {
- return_ACPI_STATUS(AE_NO_MEMORY);
+ status = AE_NO_MEMORY;
+ goto unlock;
}
acpi_ps_set_name(op, node->name.integer);
@@ -115,7 +118,8 @@
acpi_ds_create_walk_state(node->owner_id, NULL, NULL, NULL);
if (!walk_state) {
acpi_ps_free_op(op);
- return_ACPI_STATUS(AE_NO_MEMORY);
+ status = AE_NO_MEMORY;
+ goto unlock;
}
status = acpi_ds_init_aml_walk(walk_state, op, node,
@@ -134,6 +138,8 @@
status = acpi_ps_parse_aml(walk_state);
acpi_ps_delete_parse_tree(op);
+unlock:
+ acpi_ex_exit_interpreter();
return_ACPI_STATUS(status);
}
@@ -757,8 +763,10 @@
/* Delete any direct children of (created by) this method */
+ (void)acpi_ex_exit_interpreter();
acpi_ns_delete_namespace_subtree(walk_state->
method_node);
+ (void)acpi_ex_enter_interpreter();
/*
* Delete any objects that were created by this method
@@ -769,9 +777,11 @@
*/
if (method_desc->method.
info_flags & ACPI_METHOD_MODIFIED_NAMESPACE) {
+ (void)acpi_ex_exit_interpreter();
acpi_ns_delete_namespace_by_owner(method_desc->
method.
owner_id);
+ (void)acpi_ex_enter_interpreter();
method_desc->method.info_flags &=
~ACPI_METHOD_MODIFIED_NAMESPACE;
}
diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c
index f393de9..7d8ef52 100644
--- a/drivers/acpi/acpica/dsutils.c
+++ b/drivers/acpi/acpica/dsutils.c
@@ -565,15 +565,14 @@
status = AE_OK;
} else if (parent_op->common.aml_opcode ==
AML_EXTERNAL_OP) {
-
- /* TBD: May only be temporary */
-
- obj_desc =
- acpi_ut_create_string_object((acpi_size)name_length);
-
- strncpy(obj_desc->string.pointer,
- name_string, name_length);
- status = AE_OK;
+ /*
+ * This opcode should never appear here. It is used only
+ * by AML disassemblers and is surrounded by an If(0)
+ * by the ASL compiler.
+ *
+ * Therefore, if we see it here, it is a serious error.
+ */
+ status = AE_AML_BAD_OPCODE;
} else {
/*
* We just plain didn't find it -- which is a
diff --git a/drivers/acpi/acpica/dswexec.c b/drivers/acpi/acpica/dswexec.c
index 402ecc5..438597c 100644
--- a/drivers/acpi/acpica/dswexec.c
+++ b/drivers/acpi/acpica/dswexec.c
@@ -133,7 +133,8 @@
* Result of predicate evaluation must be an Integer
* object. Implicitly convert the argument if necessary.
*/
- status = acpi_ex_convert_to_integer(obj_desc, &local_obj_desc, 16);
+ status = acpi_ex_convert_to_integer(obj_desc, &local_obj_desc,
+ ACPI_STRTOUL_BASE16);
if (ACPI_FAILURE(status)) {
goto cleanup;
}
diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c
index 762db3f..028b22a 100644
--- a/drivers/acpi/acpica/dswload2.c
+++ b/drivers/acpi/acpica/dswload2.c
@@ -605,16 +605,13 @@
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
}
-
- acpi_ex_exit_interpreter();
}
+ acpi_ex_exit_interpreter();
status =
acpi_ev_initialize_region
(acpi_ns_get_attached_object(node), FALSE);
- if (walk_state->method_node) {
- acpi_ex_enter_interpreter();
- }
+ acpi_ex_enter_interpreter();
if (ACPI_FAILURE(status)) {
/*
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index 4b4949c..bdb10be 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -130,6 +130,60 @@
/*******************************************************************************
*
+ * FUNCTION: acpi_ev_mask_gpe
+ *
+ * PARAMETERS: gpe_event_info - GPE to be blocked/unblocked
+ * is_masked - Whether the GPE is masked or not
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Unconditionally mask/unmask a GPE during runtime.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ev_mask_gpe(struct acpi_gpe_event_info *gpe_event_info, u8 is_masked)
+{
+ struct acpi_gpe_register_info *gpe_register_info;
+ u32 register_bit;
+
+ ACPI_FUNCTION_TRACE(ev_mask_gpe);
+
+ gpe_register_info = gpe_event_info->register_info;
+ if (!gpe_register_info) {
+ return_ACPI_STATUS(AE_NOT_EXIST);
+ }
+
+ register_bit = acpi_hw_get_gpe_register_bit(gpe_event_info);
+
+ /* Perform the action */
+
+ if (is_masked) {
+ if (register_bit & gpe_register_info->mask_for_run) {
+ return_ACPI_STATUS(AE_BAD_PARAMETER);
+ }
+
+ (void)acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE);
+ ACPI_SET_BIT(gpe_register_info->mask_for_run, (u8)register_bit);
+ } else {
+ if (!(register_bit & gpe_register_info->mask_for_run)) {
+ return_ACPI_STATUS(AE_BAD_PARAMETER);
+ }
+
+ ACPI_CLEAR_BIT(gpe_register_info->mask_for_run,
+ (u8)register_bit);
+ if (gpe_event_info->runtime_count
+ && !gpe_event_info->disable_for_dispatch) {
+ (void)acpi_hw_low_set_gpe(gpe_event_info,
+ ACPI_GPE_ENABLE);
+ }
+ }
+
+ return_ACPI_STATUS(AE_OK);
+}
+
+/*******************************************************************************
+ *
* FUNCTION: acpi_ev_add_gpe_reference
*
* PARAMETERS: gpe_event_info - Add a reference to this GPE
@@ -674,6 +728,7 @@
* in the event_info.
*/
(void)acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_CONDITIONAL_ENABLE);
+ gpe_event_info->disable_for_dispatch = FALSE;
return (AE_OK);
}
@@ -737,6 +792,8 @@
}
}
+ gpe_event_info->disable_for_dispatch = TRUE;
+
/*
* Dispatch the GPE to either an installed handler or the control
* method associated with this GPE (_Lxx or _Exx). If a handler
diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c
index 7dc7547..16ce483 100644
--- a/drivers/acpi/acpica/evgpeinit.c
+++ b/drivers/acpi/acpica/evgpeinit.c
@@ -323,7 +323,9 @@
struct acpi_gpe_walk_info *walk_info =
ACPI_CAST_PTR(struct acpi_gpe_walk_info, context);
struct acpi_gpe_event_info *gpe_event_info;
+ acpi_status status;
u32 gpe_number;
+ u8 temp_gpe_number;
char name[ACPI_NAME_SIZE + 1];
u8 type;
@@ -377,8 +379,8 @@
/* 4) The last two characters of the name are the hex GPE Number */
- gpe_number = strtoul(&name[2], NULL, 16);
- if (gpe_number == ACPI_UINT32_MAX) {
+ status = acpi_ut_ascii_to_hex_byte(&name[2], &temp_gpe_number);
+ if (ACPI_FAILURE(status)) {
/* Conversion failed; invalid method, just ignore it */
@@ -390,6 +392,7 @@
/* Ensure that we have a valid GPE number for this GPE block */
+ gpe_number = (u32)temp_gpe_number;
gpe_event_info =
acpi_ev_low_get_gpe_info(gpe_number, walk_info->gpe_block);
if (!gpe_event_info) {
diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index b6ea9c0..3843f1f 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c
@@ -553,7 +553,8 @@
*
* See acpi_ns_exec_module_code
*/
- if (obj_desc->method.
+ if (!acpi_gbl_parse_table_as_term_list &&
+ obj_desc->method.
info_flags & ACPI_METHOD_MODULE_LEVEL) {
handler_obj =
obj_desc->method.dispatch.handler;
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index 17cfef7..d7a3b27 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -235,11 +235,13 @@
case ACPI_GPE_ENABLE:
status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_ENABLE);
+ gpe_event_info->disable_for_dispatch = FALSE;
break;
case ACPI_GPE_DISABLE:
status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE);
+ gpe_event_info->disable_for_dispatch = TRUE;
break;
default:
@@ -257,6 +259,47 @@
/*******************************************************************************
*
+ * FUNCTION: acpi_mask_gpe
+ *
+ * PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1
+ * gpe_number - GPE level within the GPE block
+ * is_masked - Whether the GPE is masked or not
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Unconditionally mask/unmask the an individual GPE, ex., to
+ * prevent a GPE flooding.
+ *
+ ******************************************************************************/
+acpi_status acpi_mask_gpe(acpi_handle gpe_device, u32 gpe_number, u8 is_masked)
+{
+ struct acpi_gpe_event_info *gpe_event_info;
+ acpi_status status;
+ acpi_cpu_flags flags;
+
+ ACPI_FUNCTION_TRACE(acpi_mask_gpe);
+
+ flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
+
+ /* Ensure that we have a valid GPE number */
+
+ gpe_event_info = acpi_ev_get_gpe_event_info(gpe_device, gpe_number);
+ if (!gpe_event_info) {
+ status = AE_BAD_PARAMETER;
+ goto unlock_and_exit;
+ }
+
+ status = acpi_ev_mask_gpe(gpe_event_info, is_masked);
+
+unlock_and_exit:
+ acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
+ return_ACPI_STATUS(status);
+}
+
+ACPI_EXPORT_SYMBOL(acpi_mask_gpe)
+
+/*******************************************************************************
+ *
* FUNCTION: acpi_mark_gpe_for_wake
*
* PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1
diff --git a/drivers/acpi/acpica/exconcat.c b/drivers/acpi/acpica/exconcat.c
index 2423fe0..5429c2a 100644
--- a/drivers/acpi/acpica/exconcat.c
+++ b/drivers/acpi/acpica/exconcat.c
@@ -156,7 +156,7 @@
status =
acpi_ex_convert_to_integer(local_operand1, &temp_operand1,
- 16);
+ ACPI_STRTOUL_BASE16);
break;
case ACPI_TYPE_BUFFER:
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index a1d177d..718428b 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -55,9 +55,7 @@
/* Local prototypes */
static acpi_status
-acpi_ex_add_table(u32 table_index,
- struct acpi_namespace_node *parent_node,
- union acpi_operand_object **ddb_handle);
+acpi_ex_add_table(u32 table_index, union acpi_operand_object **ddb_handle);
static acpi_status
acpi_ex_region_read(union acpi_operand_object *obj_desc,
@@ -79,13 +77,9 @@
******************************************************************************/
static acpi_status
-acpi_ex_add_table(u32 table_index,
- struct acpi_namespace_node *parent_node,
- union acpi_operand_object **ddb_handle)
+acpi_ex_add_table(u32 table_index, union acpi_operand_object **ddb_handle)
{
union acpi_operand_object *obj_desc;
- acpi_status status;
- acpi_owner_id owner_id;
ACPI_FUNCTION_TRACE(ex_add_table);
@@ -100,39 +94,8 @@
obj_desc->common.flags |= AOPOBJ_DATA_VALID;
obj_desc->reference.class = ACPI_REFCLASS_TABLE;
- *ddb_handle = obj_desc;
-
- /* Install the new table into the local data structures */
-
obj_desc->reference.value = table_index;
-
- /* Add the table to the namespace */
-
- status = acpi_ns_load_table(table_index, parent_node);
- if (ACPI_FAILURE(status)) {
- acpi_ut_remove_reference(obj_desc);
- *ddb_handle = NULL;
- return_ACPI_STATUS(status);
- }
-
- /* Execute any module-level code that was found in the table */
-
- acpi_ex_exit_interpreter();
- if (acpi_gbl_group_module_level_code) {
- acpi_ns_exec_module_code_list();
- }
- acpi_ex_enter_interpreter();
-
- /*
- * Update GPEs for any new _Lxx/_Exx methods. Ignore errors. The host is
- * responsible for discovering any new wake GPEs by running _PRW methods
- * that may have been loaded by this table.
- */
- status = acpi_tb_get_owner_id(table_index, &owner_id);
- if (ACPI_SUCCESS(status)) {
- acpi_ev_update_gpes(owner_id);
- }
-
+ *ddb_handle = obj_desc;
return_ACPI_STATUS(AE_OK);
}
@@ -159,16 +122,17 @@
struct acpi_namespace_node *start_node;
struct acpi_namespace_node *parameter_node = NULL;
union acpi_operand_object *ddb_handle;
- struct acpi_table_header *table;
u32 table_index;
ACPI_FUNCTION_TRACE(ex_load_table_op);
/* Find the ACPI table in the RSDT/XSDT */
+ acpi_ex_exit_interpreter();
status = acpi_tb_find_table(operand[0]->string.pointer,
operand[1]->string.pointer,
operand[2]->string.pointer, &table_index);
+ acpi_ex_enter_interpreter();
if (ACPI_FAILURE(status)) {
if (status != AE_NOT_FOUND) {
return_ACPI_STATUS(status);
@@ -197,9 +161,10 @@
* Find the node referenced by the root_path_string. This is the
* location within the namespace where the table will be loaded.
*/
- status =
- acpi_ns_get_node(start_node, operand[3]->string.pointer,
- ACPI_NS_SEARCH_PARENT, &parent_node);
+ status = acpi_ns_get_node_unlocked(start_node,
+ operand[3]->string.pointer,
+ ACPI_NS_SEARCH_PARENT,
+ &parent_node);
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
}
@@ -219,9 +184,10 @@
/* Find the node referenced by the parameter_path_string */
- status =
- acpi_ns_get_node(start_node, operand[4]->string.pointer,
- ACPI_NS_SEARCH_PARENT, ¶meter_node);
+ status = acpi_ns_get_node_unlocked(start_node,
+ operand[4]->string.pointer,
+ ACPI_NS_SEARCH_PARENT,
+ ¶meter_node);
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
}
@@ -229,7 +195,15 @@
/* Load the table into the namespace */
- status = acpi_ex_add_table(table_index, parent_node, &ddb_handle);
+ ACPI_INFO(("Dynamic OEM Table Load:"));
+ acpi_ex_exit_interpreter();
+ status = acpi_tb_load_table(table_index, parent_node);
+ acpi_ex_enter_interpreter();
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
+ }
+
+ status = acpi_ex_add_table(table_index, &ddb_handle);
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
}
@@ -252,19 +226,6 @@
}
}
- status = acpi_get_table_by_index(table_index, &table);
- if (ACPI_SUCCESS(status)) {
- ACPI_INFO(("Dynamic OEM Table Load:"));
- acpi_tb_print_table_header(0, table);
- }
-
- /* Invoke table handler if present */
-
- if (acpi_gbl_table_handler) {
- (void)acpi_gbl_table_handler(ACPI_TABLE_EVENT_LOAD, table,
- acpi_gbl_table_handler_context);
- }
-
*return_desc = ddb_handle;
return_ACPI_STATUS(status);
}
@@ -475,13 +436,12 @@
/* Install the new table into the local data structures */
ACPI_INFO(("Dynamic OEM Table Load:"));
- (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
-
- status = acpi_tb_install_standard_table(ACPI_PTR_TO_PHYSADDR(table),
- ACPI_TABLE_ORIGIN_INTERNAL_VIRTUAL,
- TRUE, TRUE, &table_index);
-
- (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+ acpi_ex_exit_interpreter();
+ status =
+ acpi_tb_install_and_load_table(table, ACPI_PTR_TO_PHYSADDR(table),
+ ACPI_TABLE_ORIGIN_INTERNAL_VIRTUAL,
+ TRUE, &table_index);
+ acpi_ex_enter_interpreter();
if (ACPI_FAILURE(status)) {
/* Delete allocated table buffer */
@@ -491,25 +451,13 @@
}
/*
- * Note: Now table is "INSTALLED", it must be validated before
- * loading.
- */
- status =
- acpi_tb_validate_table(&acpi_gbl_root_table_list.
- tables[table_index]);
- if (ACPI_FAILURE(status)) {
- return_ACPI_STATUS(status);
- }
-
- /*
* Add the table to the namespace.
*
* Note: Load the table objects relative to the root of the namespace.
* This appears to go against the ACPI specification, but we do it for
* compatibility with other ACPI implementations.
*/
- status =
- acpi_ex_add_table(table_index, acpi_gbl_root_node, &ddb_handle);
+ status = acpi_ex_add_table(table_index, &ddb_handle);
if (ACPI_FAILURE(status)) {
/* On error, table_ptr was deallocated above */
@@ -532,14 +480,6 @@
/* Remove the reference by added by acpi_ex_store above */
acpi_ut_remove_reference(ddb_handle);
-
- /* Invoke table handler if present */
-
- if (acpi_gbl_table_handler) {
- (void)acpi_gbl_table_handler(ACPI_TABLE_EVENT_LOAD, table,
- acpi_gbl_table_handler_context);
- }
-
return_ACPI_STATUS(status);
}
@@ -592,10 +532,17 @@
table_index = table_desc->reference.value;
+ /*
+ * Release the interpreter lock so that the table lock won't have
+ * strict order requirement against it.
+ */
+ acpi_ex_exit_interpreter();
+
/* Ensure the table is still loaded */
if (!acpi_tb_is_table_loaded(table_index)) {
- return_ACPI_STATUS(AE_NOT_EXIST);
+ status = AE_NOT_EXIST;
+ goto lock_and_exit;
}
/* Invoke table handler if present */
@@ -613,16 +560,24 @@
status = acpi_tb_delete_namespace_by_owner(table_index);
if (ACPI_FAILURE(status)) {
- return_ACPI_STATUS(status);
+ goto lock_and_exit;
}
(void)acpi_tb_release_owner_id(table_index);
acpi_tb_set_table_loaded_flag(table_index, FALSE);
+lock_and_exit:
+
+ /* Re-acquire the interpreter lock */
+
+ acpi_ex_enter_interpreter();
+
/*
* Invalidate the handle. We do this because the handle may be stored
* in a named object and may not be actually deleted until much later.
*/
- ddb_handle->common.flags &= ~AOPOBJ_DATA_VALID;
- return_ACPI_STATUS(AE_OK);
+ if (ACPI_SUCCESS(status)) {
+ ddb_handle->common.flags &= ~AOPOBJ_DATA_VALID;
+ }
+ return_ACPI_STATUS(status);
}
diff --git a/drivers/acpi/acpica/exconvrt.c b/drivers/acpi/acpica/exconvrt.c
index b7e9b3d..588ad14 100644
--- a/drivers/acpi/acpica/exconvrt.c
+++ b/drivers/acpi/acpica/exconvrt.c
@@ -124,9 +124,9 @@
* of ACPI 3.0) is that the to_integer() operator allows both decimal
* and hexadecimal strings (hex prefixed with "0x").
*/
- status = acpi_ut_strtoul64((char *)pointer, flags,
- acpi_gbl_integer_byte_width,
- &result);
+ status = acpi_ut_strtoul64(ACPI_CAST_PTR(char, pointer),
+ (acpi_gbl_integer_byte_width |
+ flags), &result);
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
}
@@ -632,7 +632,7 @@
*/
status =
acpi_ex_convert_to_integer(source_desc, result_desc,
- 16);
+ ACPI_STRTOUL_BASE16);
break;
case ACPI_TYPE_STRING:
diff --git a/drivers/acpi/acpica/exmisc.c b/drivers/acpi/acpica/exmisc.c
index 4f7e667..37c88b4 100644
--- a/drivers/acpi/acpica/exmisc.c
+++ b/drivers/acpi/acpica/exmisc.c
@@ -327,8 +327,8 @@
switch (operand0->common.type) {
case ACPI_TYPE_INTEGER:
- status =
- acpi_ex_convert_to_integer(operand1, &local_operand1, 16);
+ status = acpi_ex_convert_to_integer(operand1, &local_operand1,
+ ACPI_STRTOUL_BASE16);
break;
case ACPI_TYPE_STRING:
diff --git a/drivers/acpi/acpica/exoparg1.c b/drivers/acpi/acpica/exoparg1.c
index 4e17506..0073004 100644
--- a/drivers/acpi/acpica/exoparg1.c
+++ b/drivers/acpi/acpica/exoparg1.c
@@ -521,9 +521,10 @@
case AML_TO_INTEGER_OP: /* to_integer (Data, Result) */
+ /* Perform "explicit" conversion */
+
status =
- acpi_ex_convert_to_integer(operand[0], &return_desc,
- ACPI_ANY_BASE);
+ acpi_ex_convert_to_integer(operand[0], &return_desc, 0);
if (return_desc == operand[0]) {
/* No conversion performed, add ref to handle return value */
@@ -890,14 +891,16 @@
* Field, so we need to resolve the node to a value.
*/
status =
- acpi_ns_get_node(walk_state->scope_info->
- scope.node,
- operand[0]->string.pointer,
- ACPI_NS_SEARCH_PARENT,
- ACPI_CAST_INDIRECT_PTR
- (struct
- acpi_namespace_node,
- &return_desc));
+ acpi_ns_get_node_unlocked(walk_state->
+ scope_info->scope.
+ node,
+ operand[0]->
+ string.pointer,
+ ACPI_NS_SEARCH_PARENT,
+ ACPI_CAST_INDIRECT_PTR
+ (struct
+ acpi_namespace_node,
+ &return_desc));
if (ACPI_FAILURE(status)) {
goto cleanup;
}
diff --git a/drivers/acpi/acpica/exresop.c b/drivers/acpi/acpica/exresop.c
index 27b41fd..f29eba1 100644
--- a/drivers/acpi/acpica/exresop.c
+++ b/drivers/acpi/acpica/exresop.c
@@ -410,12 +410,13 @@
case ARGI_INTEGER:
/*
- * Need an operand of type ACPI_TYPE_INTEGER,
- * But we can implicitly convert from a STRING or BUFFER
- * aka - "Implicit Source Operand Conversion"
+ * Need an operand of type ACPI_TYPE_INTEGER, but we can
+ * implicitly convert from a STRING or BUFFER.
+ *
+ * Known as "Implicit Source Operand Conversion"
*/
- status =
- acpi_ex_convert_to_integer(obj_desc, stack_ptr, 16);
+ status = acpi_ex_convert_to_integer(obj_desc, stack_ptr,
+ ACPI_STRTOUL_BASE16);
if (ACPI_FAILURE(status)) {
if (status == AE_TYPE) {
ACPI_ERROR((AE_INFO,
diff --git a/drivers/acpi/acpica/extrace.c b/drivers/acpi/acpica/extrace.c
index b52e848..c9ca826 100644
--- a/drivers/acpi/acpica/extrace.c
+++ b/drivers/acpi/acpica/extrace.c
@@ -201,7 +201,6 @@
union acpi_operand_object *obj_desc,
struct acpi_walk_state *walk_state)
{
- acpi_status status;
char *pathname = NULL;
u8 enabled = FALSE;
@@ -211,11 +210,6 @@
pathname = acpi_ns_get_normalized_pathname(method_node, TRUE);
}
- status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
- if (ACPI_FAILURE(status)) {
- goto exit;
- }
-
enabled = acpi_ex_interpreter_trace_enabled(pathname);
if (enabled && !acpi_gbl_trace_method_object) {
acpi_gbl_trace_method_object = obj_desc;
@@ -233,9 +227,6 @@
}
}
- (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-
-exit:
if (enabled) {
ACPI_TRACE_POINT(ACPI_TRACE_AML_METHOD, TRUE,
obj_desc ? obj_desc->method.aml_start : NULL,
@@ -267,7 +258,6 @@
union acpi_operand_object *obj_desc,
struct acpi_walk_state *walk_state)
{
- acpi_status status;
char *pathname = NULL;
u8 enabled;
@@ -277,26 +267,14 @@
pathname = acpi_ns_get_normalized_pathname(method_node, TRUE);
}
- status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
- if (ACPI_FAILURE(status)) {
- goto exit_path;
- }
-
enabled = acpi_ex_interpreter_trace_enabled(NULL);
- (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-
if (enabled) {
ACPI_TRACE_POINT(ACPI_TRACE_AML_METHOD, FALSE,
obj_desc ? obj_desc->method.aml_start : NULL,
pathname);
}
- status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
- if (ACPI_FAILURE(status)) {
- goto exit_path;
- }
-
/* Check whether the tracer should be stopped */
if (acpi_gbl_trace_method_object == obj_desc) {
@@ -312,9 +290,6 @@
acpi_gbl_trace_method_object = NULL;
}
- (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-
-exit_path:
if (pathname) {
ACPI_FREE(pathname);
}
diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index 425f133..a8b857a 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -94,6 +94,10 @@
ACPI_ERROR((AE_INFO,
"Could not acquire AML Interpreter mutex"));
}
+ status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
+ if (ACPI_FAILURE(status)) {
+ ACPI_ERROR((AE_INFO, "Could not acquire AML Namespace mutex"));
+ }
return_VOID;
}
@@ -127,6 +131,10 @@
ACPI_FUNCTION_TRACE(ex_exit_interpreter);
+ status = acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
+ if (ACPI_FAILURE(status)) {
+ ACPI_ERROR((AE_INFO, "Could not release AML Namespace mutex"));
+ }
status = acpi_ut_release_mutex(ACPI_MTX_INTERPRETER);
if (ACPI_FAILURE(status)) {
ACPI_ERROR((AE_INFO,
diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index bdecd5e..76b0e35 100644
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@@ -98,7 +98,7 @@
acpi_hw_low_set_gpe(struct acpi_gpe_event_info *gpe_event_info, u32 action)
{
struct acpi_gpe_register_info *gpe_register_info;
- acpi_status status;
+ acpi_status status = AE_OK;
u32 enable_mask;
u32 register_bit;
@@ -148,9 +148,14 @@
return (AE_BAD_PARAMETER);
}
- /* Write the updated enable mask */
+ if (!(register_bit & gpe_register_info->mask_for_run)) {
- status = acpi_hw_write(enable_mask, &gpe_register_info->enable_address);
+ /* Write the updated enable mask */
+
+ status =
+ acpi_hw_write(enable_mask,
+ &gpe_register_info->enable_address);
+ }
return (status);
}
@@ -242,6 +247,12 @@
local_event_status |= ACPI_EVENT_FLAG_ENABLED;
}
+ /* GPE currently masked? (masked for runtime?) */
+
+ if (register_bit & gpe_register_info->mask_for_run) {
+ local_event_status |= ACPI_EVENT_FLAG_MASKED;
+ }
+
/* GPE enabled for wake? */
if (register_bit & gpe_register_info->enable_for_wake) {
@@ -397,6 +408,7 @@
u32 i;
acpi_status status;
struct acpi_gpe_register_info *gpe_register_info;
+ u8 enable_mask;
/* NOTE: assumes that all GPEs are currently disabled */
@@ -410,9 +422,10 @@
/* Enable all "runtime" GPEs in this register */
+ enable_mask = gpe_register_info->enable_for_run &
+ ~gpe_register_info->mask_for_run;
status =
- acpi_hw_gpe_enable_write(gpe_register_info->enable_for_run,
- gpe_register_info);
+ acpi_hw_gpe_enable_write(enable_mask, gpe_register_info);
if (ACPI_FAILURE(status)) {
return (status);
}
diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c
index 426a630..73f98d3 100644
--- a/drivers/acpi/acpica/nsaccess.c
+++ b/drivers/acpi/acpica/nsaccess.c
@@ -108,9 +108,9 @@
}
status =
- acpi_ns_lookup(NULL, (char *)init_val->name, init_val->type,
- ACPI_IMODE_LOAD_PASS2, ACPI_NS_NO_UPSEARCH,
- NULL, &new_node);
+ acpi_ns_lookup(NULL, ACPI_CAST_PTR(char, init_val->name),
+ init_val->type, ACPI_IMODE_LOAD_PASS2,
+ ACPI_NS_NO_UPSEARCH, NULL, &new_node);
if (ACPI_FAILURE(status)) {
ACPI_EXCEPTION((AE_INFO, status,
"Could not create predefined name %s",
diff --git a/drivers/acpi/acpica/nsconvert.c b/drivers/acpi/acpica/nsconvert.c
index c803bda..2b85dee 100644
--- a/drivers/acpi/acpica/nsconvert.c
+++ b/drivers/acpi/acpica/nsconvert.c
@@ -79,7 +79,6 @@
/* String-to-Integer conversion */
status = acpi_ut_strtoul64(original_object->string.pointer,
- ACPI_ANY_BASE,
acpi_gbl_integer_byte_width, &value);
if (ACPI_FAILURE(status)) {
return (status);
diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c
index ce1f860..84f35dd 100644
--- a/drivers/acpi/acpica/nsdump.c
+++ b/drivers/acpi/acpica/nsdump.c
@@ -338,7 +338,7 @@
case ACPI_TYPE_STRING:
acpi_os_printf("Len %.2X ", obj_desc->string.length);
- acpi_ut_print_string(obj_desc->string.pointer, 32);
+ acpi_ut_print_string(obj_desc->string.pointer, 80);
acpi_os_printf("\n");
break;
diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c
index b5e2b0a..334d3c5 100644
--- a/drivers/acpi/acpica/nsload.c
+++ b/drivers/acpi/acpica/nsload.c
@@ -46,6 +46,7 @@
#include "acnamesp.h"
#include "acdispat.h"
#include "actables.h"
+#include "acinterp.h"
#define _COMPONENT ACPI_NAMESPACE
ACPI_MODULE_NAME("nsload")
@@ -78,20 +79,6 @@
ACPI_FUNCTION_TRACE(ns_load_table);
- /*
- * Parse the table and load the namespace with all named
- * objects found within. Control methods are NOT parsed
- * at this time. In fact, the control methods cannot be
- * parsed until the entire namespace is loaded, because
- * if a control method makes a forward reference (call)
- * to another control method, we can't continue parsing
- * because we don't know how many arguments to parse next!
- */
- status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
- if (ACPI_FAILURE(status)) {
- return_ACPI_STATUS(status);
- }
-
/* If table already loaded into namespace, just return */
if (acpi_tb_is_table_loaded(table_index)) {
@@ -107,6 +94,15 @@
goto unlock;
}
+ /*
+ * Parse the table and load the namespace with all named
+ * objects found within. Control methods are NOT parsed
+ * at this time. In fact, the control methods cannot be
+ * parsed until the entire namespace is loaded, because
+ * if a control method makes a forward reference (call)
+ * to another control method, we can't continue parsing
+ * because we don't know how many arguments to parse next!
+ */
status = acpi_ns_parse_table(table_index, node);
if (ACPI_SUCCESS(status)) {
acpi_tb_set_table_loaded_flag(table_index, TRUE);
@@ -120,7 +116,6 @@
* exist. This target of Scope must already exist in the
* namespace, as per the ACPI specification.
*/
- (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
acpi_ns_delete_namespace_by_owner(acpi_gbl_root_table_list.
tables[table_index].owner_id);
@@ -129,8 +124,6 @@
}
unlock:
- (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
}
@@ -162,7 +155,8 @@
* other ACPI implementations. Optionally, the execution can be deferred
* until later, see acpi_initialize_objects.
*/
- if (!acpi_gbl_group_module_level_code) {
+ if (!acpi_gbl_parse_table_as_term_list
+ && !acpi_gbl_group_module_level_code) {
acpi_ns_exec_module_code_list();
}
diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c
index f631a47..4f14e92 100644
--- a/drivers/acpi/acpica/nsparse.c
+++ b/drivers/acpi/acpica/nsparse.c
@@ -47,12 +47,103 @@
#include "acparser.h"
#include "acdispat.h"
#include "actables.h"
+#include "acinterp.h"
#define _COMPONENT ACPI_NAMESPACE
ACPI_MODULE_NAME("nsparse")
/*******************************************************************************
*
+ * FUNCTION: ns_execute_table
+ *
+ * PARAMETERS: table_desc - An ACPI table descriptor for table to parse
+ * start_node - Where to enter the table into the namespace
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Load ACPI/AML table by executing the entire table as a
+ * term_list.
+ *
+ ******************************************************************************/
+acpi_status
+acpi_ns_execute_table(u32 table_index, struct acpi_namespace_node *start_node)
+{
+ acpi_status status;
+ struct acpi_table_header *table;
+ acpi_owner_id owner_id;
+ struct acpi_evaluate_info *info = NULL;
+ u32 aml_length;
+ u8 *aml_start;
+ union acpi_operand_object *method_obj = NULL;
+
+ ACPI_FUNCTION_TRACE(ns_execute_table);
+
+ status = acpi_get_table_by_index(table_index, &table);
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
+ }
+
+ /* Table must consist of at least a complete header */
+
+ if (table->length < sizeof(struct acpi_table_header)) {
+ return_ACPI_STATUS(AE_BAD_HEADER);
+ }
+
+ aml_start = (u8 *)table + sizeof(struct acpi_table_header);
+ aml_length = table->length - sizeof(struct acpi_table_header);
+
+ status = acpi_tb_get_owner_id(table_index, &owner_id);
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
+ }
+
+ /* Create, initialize, and link a new temporary method object */
+
+ method_obj = acpi_ut_create_internal_object(ACPI_TYPE_METHOD);
+ if (!method_obj) {
+ return_ACPI_STATUS(AE_NO_MEMORY);
+ }
+
+ /* Allocate the evaluation information block */
+
+ info = ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info));
+ if (!info) {
+ status = AE_NO_MEMORY;
+ goto cleanup;
+ }
+
+ ACPI_DEBUG_PRINT((ACPI_DB_PARSE,
+ "Create table code block: %p\n", method_obj));
+
+ method_obj->method.aml_start = aml_start;
+ method_obj->method.aml_length = aml_length;
+ method_obj->method.owner_id = owner_id;
+ method_obj->method.info_flags |= ACPI_METHOD_MODULE_LEVEL;
+
+ info->pass_number = ACPI_IMODE_EXECUTE;
+ info->node = start_node;
+ info->obj_desc = method_obj;
+ info->node_flags = info->node->flags;
+ info->full_pathname = acpi_ns_get_normalized_pathname(info->node, TRUE);
+ if (!info->full_pathname) {
+ status = AE_NO_MEMORY;
+ goto cleanup;
+ }
+
+ status = acpi_ps_execute_table(info);
+
+cleanup:
+ if (info) {
+ ACPI_FREE(info->full_pathname);
+ info->full_pathname = NULL;
+ }
+ ACPI_FREE(info);
+ acpi_ut_remove_reference(method_obj);
+ return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
* FUNCTION: ns_one_complete_parse
*
* PARAMETERS: pass_number - 1 or 2
@@ -63,6 +154,7 @@
* DESCRIPTION: Perform one complete parse of an ACPI/AML table.
*
******************************************************************************/
+
acpi_status
acpi_ns_one_complete_parse(u32 pass_number,
u32 table_index,
@@ -143,7 +235,9 @@
ACPI_DEBUG_PRINT((ACPI_DB_PARSE,
"*PARSE* pass %u parse\n", pass_number));
+ acpi_ex_enter_interpreter();
status = acpi_ps_parse_aml(walk_state);
+ acpi_ex_exit_interpreter();
cleanup:
acpi_ps_delete_parse_tree(parse_root);
@@ -170,38 +264,47 @@
ACPI_FUNCTION_TRACE(ns_parse_table);
- /*
- * AML Parse, pass 1
- *
- * In this pass, we load most of the namespace. Control methods
- * are not parsed until later. A parse tree is not created. Instead,
- * each Parser Op subtree is deleted when it is finished. This saves
- * a great deal of memory, and allows a small cache of parse objects
- * to service the entire parse. The second pass of the parse then
- * performs another complete parse of the AML.
- */
- ACPI_DEBUG_PRINT((ACPI_DB_PARSE, "**** Start pass 1\n"));
+ if (acpi_gbl_parse_table_as_term_list) {
+ ACPI_DEBUG_PRINT((ACPI_DB_PARSE, "**** Start load pass\n"));
- status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS1,
- table_index, start_node);
- if (ACPI_FAILURE(status)) {
- return_ACPI_STATUS(status);
- }
+ status = acpi_ns_execute_table(table_index, start_node);
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
+ }
+ } else {
+ /*
+ * AML Parse, pass 1
+ *
+ * In this pass, we load most of the namespace. Control methods
+ * are not parsed until later. A parse tree is not created.
+ * Instead, each Parser Op subtree is deleted when it is finished.
+ * This saves a great deal of memory, and allows a small cache of
+ * parse objects to service the entire parse. The second pass of
+ * the parse then performs another complete parse of the AML.
+ */
+ ACPI_DEBUG_PRINT((ACPI_DB_PARSE, "**** Start pass 1\n"));
- /*
- * AML Parse, pass 2
- *
- * In this pass, we resolve forward references and other things
- * that could not be completed during the first pass.
- * Another complete parse of the AML is performed, but the
- * overhead of this is compensated for by the fact that the
- * parse objects are all cached.
- */
- ACPI_DEBUG_PRINT((ACPI_DB_PARSE, "**** Start pass 2\n"));
- status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS2,
- table_index, start_node);
- if (ACPI_FAILURE(status)) {
- return_ACPI_STATUS(status);
+ status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS1,
+ table_index, start_node);
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
+ }
+
+ /*
+ * AML Parse, pass 2
+ *
+ * In this pass, we resolve forward references and other things
+ * that could not be completed during the first pass.
+ * Another complete parse of the AML is performed, but the
+ * overhead of this is compensated for by the fact that the
+ * parse objects are all cached.
+ */
+ ACPI_DEBUG_PRINT((ACPI_DB_PARSE, "**** Start pass 2\n"));
+ status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS2,
+ table_index, start_node);
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
+ }
}
return_ACPI_STATUS(status);
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index 784a30b..691814d 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -662,7 +662,7 @@
/*******************************************************************************
*
- * FUNCTION: acpi_ns_get_node
+ * FUNCTION: acpi_ns_get_node_unlocked
*
* PARAMETERS: *pathname - Name to be found, in external (ASL) format. The
* \ (backslash) and ^ (carat) prefixes, and the
@@ -678,20 +678,21 @@
* DESCRIPTION: Look up a name relative to a given scope and return the
* corresponding Node. NOTE: Scope can be null.
*
- * MUTEX: Locks namespace
+ * MUTEX: Doesn't locks namespace
*
******************************************************************************/
acpi_status
-acpi_ns_get_node(struct acpi_namespace_node *prefix_node,
- const char *pathname,
- u32 flags, struct acpi_namespace_node **return_node)
+acpi_ns_get_node_unlocked(struct acpi_namespace_node *prefix_node,
+ const char *pathname,
+ u32 flags, struct acpi_namespace_node **return_node)
{
union acpi_generic_state scope_info;
acpi_status status;
char *internal_path;
- ACPI_FUNCTION_TRACE_PTR(ns_get_node, ACPI_CAST_PTR(char, pathname));
+ ACPI_FUNCTION_TRACE_PTR(ns_get_node_unlocked,
+ ACPI_CAST_PTR(char, pathname));
/* Simplest case is a null pathname */
@@ -718,13 +719,6 @@
return_ACPI_STATUS(status);
}
- /* Must lock namespace during lookup */
-
- status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
- if (ACPI_FAILURE(status)) {
- goto cleanup;
- }
-
/* Setup lookup scope (search starting point) */
scope_info.scope.node = prefix_node;
@@ -740,9 +734,49 @@
pathname, acpi_format_exception(status)));
}
- (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-
-cleanup:
ACPI_FREE(internal_path);
return_ACPI_STATUS(status);
}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_ns_get_node
+ *
+ * PARAMETERS: *pathname - Name to be found, in external (ASL) format. The
+ * \ (backslash) and ^ (carat) prefixes, and the
+ * . (period) to separate segments are supported.
+ * prefix_node - Root of subtree to be searched, or NS_ALL for the
+ * root of the name space. If Name is fully
+ * qualified (first s8 is '\'), the passed value
+ * of Scope will not be accessed.
+ * flags - Used to indicate whether to perform upsearch or
+ * not.
+ * return_node - Where the Node is returned
+ *
+ * DESCRIPTION: Look up a name relative to a given scope and return the
+ * corresponding Node. NOTE: Scope can be null.
+ *
+ * MUTEX: Locks namespace
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ns_get_node(struct acpi_namespace_node *prefix_node,
+ const char *pathname,
+ u32 flags, struct acpi_namespace_node **return_node)
+{
+ acpi_status status;
+
+ ACPI_FUNCTION_TRACE_PTR(ns_get_node, ACPI_CAST_PTR(char, pathname));
+
+ status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
+ }
+
+ status = acpi_ns_get_node_unlocked(prefix_node, pathname,
+ flags, return_node);
+
+ (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
+ return_ACPI_STATUS(status);
+}
diff --git a/drivers/acpi/acpica/psparse.c b/drivers/acpi/acpica/psparse.c
index 0a23897..1ce26d9 100644
--- a/drivers/acpi/acpica/psparse.c
+++ b/drivers/acpi/acpica/psparse.c
@@ -537,9 +537,11 @@
/* Either the method parse or actual execution failed */
+ acpi_ex_exit_interpreter();
ACPI_ERROR_METHOD("Method parse/execution failed",
walk_state->method_node, NULL,
status);
+ acpi_ex_enter_interpreter();
/* Check for possible multi-thread reentrancy problem */
@@ -571,7 +573,9 @@
* cleanup to do
*/
if (((walk_state->parse_flags & ACPI_PARSE_MODE_MASK) ==
- ACPI_PARSE_EXECUTE) || (ACPI_FAILURE(status))) {
+ ACPI_PARSE_EXECUTE &&
+ !(walk_state->parse_flags & ACPI_PARSE_MODULE_LEVEL)) ||
+ (ACPI_FAILURE(status))) {
acpi_ds_terminate_control_method(walk_state->
method_desc,
walk_state);
diff --git a/drivers/acpi/acpica/psxface.c b/drivers/acpi/acpica/psxface.c
index cf30cd82..f3c8726 100644
--- a/drivers/acpi/acpica/psxface.c
+++ b/drivers/acpi/acpica/psxface.c
@@ -252,6 +252,90 @@
/*******************************************************************************
*
+ * FUNCTION: acpi_ps_execute_table
+ *
+ * PARAMETERS: info - Method info block, contains:
+ * node - Node to where the is entered into the
+ * namespace
+ * obj_desc - Pseudo method object describing the AML
+ * code of the entire table
+ * pass_number - Parse or execute pass
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Execute a table
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ps_execute_table(struct acpi_evaluate_info *info)
+{
+ acpi_status status;
+ union acpi_parse_object *op = NULL;
+ struct acpi_walk_state *walk_state = NULL;
+
+ ACPI_FUNCTION_TRACE(ps_execute_table);
+
+ /* Create and init a Root Node */
+
+ op = acpi_ps_create_scope_op(info->obj_desc->method.aml_start);
+ if (!op) {
+ status = AE_NO_MEMORY;
+ goto cleanup;
+ }
+
+ /* Create and initialize a new walk state */
+
+ walk_state =
+ acpi_ds_create_walk_state(info->obj_desc->method.owner_id, NULL,
+ NULL, NULL);
+ if (!walk_state) {
+ status = AE_NO_MEMORY;
+ goto cleanup;
+ }
+
+ status = acpi_ds_init_aml_walk(walk_state, op, info->node,
+ info->obj_desc->method.aml_start,
+ info->obj_desc->method.aml_length, info,
+ info->pass_number);
+ if (ACPI_FAILURE(status)) {
+ goto cleanup;
+ }
+
+ if (info->obj_desc->method.info_flags & ACPI_METHOD_MODULE_LEVEL) {
+ walk_state->parse_flags |= ACPI_PARSE_MODULE_LEVEL;
+ }
+
+ /* Info->Node is the default location to load the table */
+
+ if (info->node && info->node != acpi_gbl_root_node) {
+ status =
+ acpi_ds_scope_stack_push(info->node, ACPI_TYPE_METHOD,
+ walk_state);
+ if (ACPI_FAILURE(status)) {
+ goto cleanup;
+ }
+ }
+
+ /*
+ * Parse the AML, walk_state will be deleted by parse_aml
+ */
+ acpi_ex_enter_interpreter();
+ status = acpi_ps_parse_aml(walk_state);
+ acpi_ex_exit_interpreter();
+ walk_state = NULL;
+
+cleanup:
+ if (walk_state) {
+ acpi_ds_delete_walk_state(walk_state);
+ }
+ if (op) {
+ acpi_ps_delete_parse_tree(op);
+ }
+ return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
* FUNCTION: acpi_ps_update_parameter_list
*
* PARAMETERS: info - See struct acpi_evaluate_info
diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c
index 1388a19..d9ca8c2 100644
--- a/drivers/acpi/acpica/tbdata.c
+++ b/drivers/acpi/acpica/tbdata.c
@@ -45,6 +45,7 @@
#include "accommon.h"
#include "acnamesp.h"
#include "actables.h"
+#include "acevents.h"
#define _COMPONENT ACPI_TABLES
ACPI_MODULE_NAME("tbdata")
@@ -613,17 +614,12 @@
* lock may block, and also since the execution of a namespace walk
* must be allowed to use the interpreter.
*/
- (void)acpi_ut_release_mutex(ACPI_MTX_INTERPRETER);
status = acpi_ut_acquire_write_lock(&acpi_gbl_namespace_rw_lock);
-
- acpi_ns_delete_namespace_by_owner(owner_id);
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
}
-
+ acpi_ns_delete_namespace_by_owner(owner_id);
acpi_ut_release_write_lock(&acpi_gbl_namespace_rw_lock);
-
- status = acpi_ut_acquire_mutex(ACPI_MTX_INTERPRETER);
return_ACPI_STATUS(status);
}
@@ -771,3 +767,142 @@
(void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_tb_load_table
+ *
+ * PARAMETERS: table_index - Table index
+ * parent_node - Where table index is returned
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Load an ACPI table
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_tb_load_table(u32 table_index, struct acpi_namespace_node *parent_node)
+{
+ struct acpi_table_header *table;
+ acpi_status status;
+ acpi_owner_id owner_id;
+
+ ACPI_FUNCTION_TRACE(tb_load_table);
+
+ /*
+ * Note: Now table is "INSTALLED", it must be validated before
+ * using.
+ */
+ status = acpi_get_table_by_index(table_index, &table);
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
+ }
+
+ status = acpi_ns_load_table(table_index, parent_node);
+
+ /* Execute any module-level code that was found in the table */
+
+ if (!acpi_gbl_parse_table_as_term_list
+ && acpi_gbl_group_module_level_code) {
+ acpi_ns_exec_module_code_list();
+ }
+
+ /*
+ * Update GPEs for any new _Lxx/_Exx methods. Ignore errors. The host is
+ * responsible for discovering any new wake GPEs by running _PRW methods
+ * that may have been loaded by this table.
+ */
+ status = acpi_tb_get_owner_id(table_index, &owner_id);
+ if (ACPI_SUCCESS(status)) {
+ acpi_ev_update_gpes(owner_id);
+ }
+
+ /* Invoke table handler if present */
+
+ if (acpi_gbl_table_handler) {
+ (void)acpi_gbl_table_handler(ACPI_TABLE_EVENT_LOAD, table,
+ acpi_gbl_table_handler_context);
+ }
+
+ return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_tb_install_and_load_table
+ *
+ * PARAMETERS: table - Pointer to the table
+ * address - Physical address of the table
+ * flags - Allocation flags of the table
+ * table_index - Where table index is returned
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Install and load an ACPI table
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_tb_install_and_load_table(struct acpi_table_header *table,
+ acpi_physical_address address,
+ u8 flags, u8 override, u32 *table_index)
+{
+ acpi_status status;
+ u32 i;
+ acpi_owner_id owner_id;
+
+ ACPI_FUNCTION_TRACE(acpi_load_table);
+
+ (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
+
+ /* Install the table and load it into the namespace */
+
+ status = acpi_tb_install_standard_table(address, flags, TRUE,
+ override, &i);
+ if (ACPI_FAILURE(status)) {
+ goto unlock_and_exit;
+ }
+
+ /*
+ * Note: Now table is "INSTALLED", it must be validated before
+ * using.
+ */
+ status = acpi_tb_validate_table(&acpi_gbl_root_table_list.tables[i]);
+ if (ACPI_FAILURE(status)) {
+ goto unlock_and_exit;
+ }
+
+ (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+ status = acpi_ns_load_table(i, acpi_gbl_root_node);
+
+ /* Execute any module-level code that was found in the table */
+
+ if (!acpi_gbl_parse_table_as_term_list
+ && acpi_gbl_group_module_level_code) {
+ acpi_ns_exec_module_code_list();
+ }
+
+ /*
+ * Update GPEs for any new _Lxx/_Exx methods. Ignore errors. The host is
+ * responsible for discovering any new wake GPEs by running _PRW methods
+ * that may have been loaded by this table.
+ */
+ status = acpi_tb_get_owner_id(i, &owner_id);
+ if (ACPI_SUCCESS(status)) {
+ acpi_ev_update_gpes(owner_id);
+ }
+
+ /* Invoke table handler if present */
+
+ if (acpi_gbl_table_handler) {
+ (void)acpi_gbl_table_handler(ACPI_TABLE_EVENT_LOAD, table,
+ acpi_gbl_table_handler_context);
+ }
+ (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
+
+unlock_and_exit:
+ *table_index = i;
+ (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+ return_ACPI_STATUS(status);
+}
diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c
index 6208069..046c4d0 100644
--- a/drivers/acpi/acpica/tbfadt.c
+++ b/drivers/acpi/acpica/tbfadt.c
@@ -344,23 +344,27 @@
/* Obtain the DSDT and FACS tables via their addresses within the FADT */
- acpi_tb_install_fixed_table((acpi_physical_address)acpi_gbl_FADT.Xdsdt,
- ACPI_SIG_DSDT, &acpi_gbl_dsdt_index);
+ acpi_tb_install_standard_table((acpi_physical_address)acpi_gbl_FADT.
+ Xdsdt,
+ ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL,
+ FALSE, TRUE, &acpi_gbl_dsdt_index);
/* If Hardware Reduced flag is set, there is no FACS */
if (!acpi_gbl_reduced_hardware) {
if (acpi_gbl_FADT.facs) {
- acpi_tb_install_fixed_table((acpi_physical_address)
- acpi_gbl_FADT.facs,
- ACPI_SIG_FACS,
- &acpi_gbl_facs_index);
+ acpi_tb_install_standard_table((acpi_physical_address)
+ acpi_gbl_FADT.facs,
+ ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL,
+ FALSE, TRUE,
+ &acpi_gbl_facs_index);
}
if (acpi_gbl_FADT.Xfacs) {
- acpi_tb_install_fixed_table((acpi_physical_address)
- acpi_gbl_FADT.Xfacs,
- ACPI_SIG_FACS,
- &acpi_gbl_xfacs_index);
+ acpi_tb_install_standard_table((acpi_physical_address)
+ acpi_gbl_FADT.Xfacs,
+ ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL,
+ FALSE, TRUE,
+ &acpi_gbl_xfacs_index);
}
}
}
@@ -476,17 +480,19 @@
u32 i;
/*
- * For ACPI 1.0 FADTs (revision 1 or 2), ensure that reserved fields which
+ * For ACPI 1.0 FADTs (revision 1), ensure that reserved fields which
* should be zero are indeed zero. This will workaround BIOSs that
* inadvertently place values in these fields.
*
* The ACPI 1.0 reserved fields that will be zeroed are the bytes located
* at offset 45, 55, 95, and the word located at offset 109, 110.
*
- * Note: The FADT revision value is unreliable. Only the length can be
- * trusted.
+ * Note: The FADT revision value is unreliable because of BIOS errors.
+ * The table length is instead used as the final word on the version.
+ *
+ * Note: FADT revision 3 is the ACPI 2.0 version of the FADT.
*/
- if (acpi_gbl_FADT.header.length <= ACPI_FADT_V2_SIZE) {
+ if (acpi_gbl_FADT.header.length <= ACPI_FADT_V3_SIZE) {
acpi_gbl_FADT.preferred_profile = 0;
acpi_gbl_FADT.pstate_control = 0;
acpi_gbl_FADT.cst_control = 0;
@@ -552,76 +558,72 @@
*
* Address32 zero, Address64 [don't care] - Use Address64
*
+ * No override: if acpi_gbl_use32_bit_fadt_addresses is FALSE, and:
* Address32 non-zero, Address64 zero - Copy/use Address32
* Address32 non-zero == Address64 non-zero - Use Address64
* Address32 non-zero != Address64 non-zero - Warning, use Address64
*
* Override: if acpi_gbl_use32_bit_fadt_addresses is TRUE, and:
+ * Address32 non-zero, Address64 zero - Copy/use Address32
+ * Address32 non-zero == Address64 non-zero - Copy/use Address32
* Address32 non-zero != Address64 non-zero - Warning, copy/use Address32
*
* Note: space_id is always I/O for 32-bit legacy address fields
*/
if (address32) {
- if (!address64->address) {
+ if (address64->address) {
+ if (address64->address != (u64)address32) {
- /* 64-bit address is zero, use 32-bit address */
+ /* Address mismatch */
- acpi_tb_init_generic_address(address64,
- ACPI_ADR_SPACE_SYSTEM_IO,
- *ACPI_ADD_PTR(u8,
- &acpi_gbl_FADT,
- fadt_info_table
- [i].
- length),
- (u64)address32,
- name, flags);
- } else if (address64->address != (u64)address32) {
+ ACPI_BIOS_WARNING((AE_INFO,
+ "32/64X address mismatch in FADT/%s: "
+ "0x%8.8X/0x%8.8X%8.8X, using %u-bit address",
+ name, address32,
+ ACPI_FORMAT_UINT64
+ (address64->address),
+ acpi_gbl_use32_bit_fadt_addresses
+ ? 32 : 64));
+ }
- /* Address mismatch */
-
- ACPI_BIOS_WARNING((AE_INFO,
- "32/64X address mismatch in FADT/%s: "
- "0x%8.8X/0x%8.8X%8.8X, using %u-bit address",
- name, address32,
- ACPI_FORMAT_UINT64
- (address64->address),
- acpi_gbl_use32_bit_fadt_addresses
- ? 32 : 64));
-
- if (acpi_gbl_use32_bit_fadt_addresses) {
-
- /* 32-bit address override */
-
- acpi_tb_init_generic_address(address64,
- ACPI_ADR_SPACE_SYSTEM_IO,
- *ACPI_ADD_PTR
- (u8,
- &acpi_gbl_FADT,
- fadt_info_table
- [i].
- length),
- (u64)
- address32,
- name,
- flags);
+ /*
+ * For each extended field, check for length mismatch
+ * between the legacy length field and the corresponding
+ * 64-bit X length field.
+ * Note: If the legacy length field is > 0xFF bits, ignore
+ * this check. (GPE registers can be larger than the
+ * 64-bit GAS structure can accomodate, 0xFF bits).
+ */
+ if ((ACPI_MUL_8(length) <= ACPI_UINT8_MAX) &&
+ (address64->bit_width !=
+ ACPI_MUL_8(length))) {
+ ACPI_BIOS_WARNING((AE_INFO,
+ "32/64X length mismatch in FADT/%s: %u/%u",
+ name,
+ ACPI_MUL_8(length),
+ address64->
+ bit_width));
}
}
- }
- /*
- * For each extended field, check for length mismatch between the
- * legacy length field and the corresponding 64-bit X length field.
- * Note: If the legacy length field is > 0xFF bits, ignore this
- * check. (GPE registers can be larger than the 64-bit GAS structure
- * can accomodate, 0xFF bits).
- */
- if (address64->address &&
- (ACPI_MUL_8(length) <= ACPI_UINT8_MAX) &&
- (address64->bit_width != ACPI_MUL_8(length))) {
- ACPI_BIOS_WARNING((AE_INFO,
- "32/64X length mismatch in FADT/%s: %u/%u",
- name, ACPI_MUL_8(length),
- address64->bit_width));
+ /*
+ * Hardware register access code always uses the 64-bit fields.
+ * So if the 64-bit field is zero or is to be overridden,
+ * initialize it with the 32-bit fields.
+ * Note that when the 32-bit address favor is specified, the
+ * 64-bit fields are always re-initialized so that
+ * access_size/bit_width/bit_offset fields can be correctly
+ * configured to the values to trigger a 32-bit compatible
+ * access mode in the hardware register access code.
+ */
+ if (!address64->address
+ || acpi_gbl_use32_bit_fadt_addresses) {
+ acpi_tb_init_generic_address(address64,
+ ACPI_ADR_SPACE_SYSTEM_IO,
+ length,
+ (u64)address32,
+ name, flags);
+ }
}
if (fadt_info_table[i].flags & ACPI_FADT_REQUIRED) {
diff --git a/drivers/acpi/acpica/tbfind.c b/drivers/acpi/acpica/tbfind.c
index e348d61..f6b9b4e 100644
--- a/drivers/acpi/acpica/tbfind.c
+++ b/drivers/acpi/acpica/tbfind.c
@@ -68,7 +68,7 @@
acpi_tb_find_table(char *signature,
char *oem_id, char *oem_table_id, u32 *table_index)
{
- acpi_status status;
+ acpi_status status = AE_OK;
struct acpi_table_header header;
u32 i;
@@ -96,6 +96,7 @@
/* Search for the table */
+ (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) {
if (memcmp(&(acpi_gbl_root_table_list.tables[i].signature),
header.signature, ACPI_NAME_SIZE)) {
@@ -115,7 +116,7 @@
acpi_tb_validate_table(&acpi_gbl_root_table_list.
tables[i]);
if (ACPI_FAILURE(status)) {
- return_ACPI_STATUS(status);
+ goto unlock_and_exit;
}
if (!acpi_gbl_root_table_list.tables[i].pointer) {
@@ -144,9 +145,12 @@
ACPI_DEBUG_PRINT((ACPI_DB_TABLES,
"Found table [%4.4s]\n",
header.signature));
- return_ACPI_STATUS(AE_OK);
+ goto unlock_and_exit;
}
}
+ status = AE_NOT_FOUND;
- return_ACPI_STATUS(AE_NOT_FOUND);
+unlock_and_exit:
+ (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+ return_ACPI_STATUS(status);
}
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index 8b13052..5fdf251 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -157,68 +157,6 @@
/*******************************************************************************
*
- * FUNCTION: acpi_tb_install_fixed_table
- *
- * PARAMETERS: address - Physical address of DSDT or FACS
- * signature - Table signature, NULL if no need to
- * match
- * table_index - Where the table index is returned
- *
- * RETURN: Status
- *
- * DESCRIPTION: Install a fixed ACPI table (DSDT/FACS) into the global data
- * structure.
- *
- ******************************************************************************/
-
-acpi_status
-acpi_tb_install_fixed_table(acpi_physical_address address,
- char *signature, u32 *table_index)
-{
- struct acpi_table_desc new_table_desc;
- acpi_status status;
-
- ACPI_FUNCTION_TRACE(tb_install_fixed_table);
-
- if (!address) {
- ACPI_ERROR((AE_INFO,
- "Null physical address for ACPI table [%s]",
- signature));
- return (AE_NO_MEMORY);
- }
-
- /* Fill a table descriptor for validation */
-
- status = acpi_tb_acquire_temp_table(&new_table_desc, address,
- ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL);
- if (ACPI_FAILURE(status)) {
- ACPI_ERROR((AE_INFO,
- "Could not acquire table length at %8.8X%8.8X",
- ACPI_FORMAT_UINT64(address)));
- return_ACPI_STATUS(status);
- }
-
- /* Validate and verify a table before installation */
-
- status = acpi_tb_verify_temp_table(&new_table_desc, signature);
- if (ACPI_FAILURE(status)) {
- goto release_and_exit;
- }
-
- /* Add the table to the global root table list */
-
- acpi_tb_install_table_with_override(&new_table_desc, TRUE, table_index);
-
-release_and_exit:
-
- /* Release the temporary table descriptor */
-
- acpi_tb_release_temp_table(&new_table_desc);
- return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
* FUNCTION: acpi_tb_install_standard_table
*
* PARAMETERS: address - Address of the table (might be a virtual
@@ -230,8 +168,7 @@
*
* RETURN: Status
*
- * DESCRIPTION: This function is called to install an ACPI table that is
- * neither DSDT nor FACS (a "standard" table.)
+ * DESCRIPTION: This function is called to verify and install an ACPI table.
* When this function is called by "Load" or "LoadTable" opcodes,
* or by acpi_load_table() API, the "Reload" parameter is set.
* After sucessfully returning from this function, table is
@@ -364,6 +301,14 @@
acpi_tb_install_table_with_override(&new_table_desc, override,
table_index);
+ /* Invoke table handler if present */
+
+ if (acpi_gbl_table_handler) {
+ (void)acpi_gbl_table_handler(ACPI_TABLE_EVENT_INSTALL,
+ new_table_desc.pointer,
+ acpi_gbl_table_handler_context);
+ }
+
release_and_exit:
/* Release the temporary table descriptor */
diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index e285539..51eb07c 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -252,7 +252,8 @@
*
******************************************************************************/
-acpi_status __init acpi_tb_parse_root_table(acpi_physical_address rsdp_address)
+acpi_status ACPI_INIT_FUNCTION
+acpi_tb_parse_root_table(acpi_physical_address rsdp_address)
{
struct acpi_table_rsdp *rsdp;
u32 table_entry_size;
diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c
index 3ecec93..4ab6b9c 100644
--- a/drivers/acpi/acpica/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c
@@ -98,7 +98,7 @@
*
******************************************************************************/
-acpi_status __init
+acpi_status ACPI_INIT_FUNCTION
acpi_initialize_tables(struct acpi_table_desc *initial_table_array,
u32 initial_table_count, u8 allow_resize)
{
@@ -164,7 +164,7 @@
* kernel.
*
******************************************************************************/
-acpi_status __init acpi_reallocate_root_table(void)
+acpi_status ACPI_INIT_FUNCTION acpi_reallocate_root_table(void)
{
acpi_status status;
diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c
index ac71abc..5569f63 100644
--- a/drivers/acpi/acpica/tbxfload.c
+++ b/drivers/acpi/acpica/tbxfload.c
@@ -63,7 +63,7 @@
* DESCRIPTION: Load the ACPI tables from the RSDT/XSDT
*
******************************************************************************/
-acpi_status __init acpi_load_tables(void)
+acpi_status ACPI_INIT_FUNCTION acpi_load_tables(void)
{
acpi_status status;
@@ -103,7 +103,8 @@
"While loading namespace from ACPI tables"));
}
- if (!acpi_gbl_group_module_level_code) {
+ if (acpi_gbl_parse_table_as_term_list
+ || !acpi_gbl_group_module_level_code) {
/*
* Initialize the objects that remain uninitialized. This
* runs the executable AML that may be part of the
@@ -188,11 +189,11 @@
memcpy(&acpi_gbl_original_dsdt_header, acpi_gbl_DSDT,
sizeof(struct acpi_table_header));
- (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
-
/* Load and parse tables */
+ (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
status = acpi_ns_load_table(acpi_gbl_dsdt_index, acpi_gbl_root_node);
+ (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
if (ACPI_FAILURE(status)) {
ACPI_EXCEPTION((AE_INFO, status, "[DSDT] table load failed"));
tables_failed++;
@@ -202,7 +203,6 @@
/* Load any SSDT or PSDT tables. Note: Loop leaves tables locked */
- (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) {
table = &acpi_gbl_root_table_list.tables[i];
@@ -220,6 +220,7 @@
(void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
status = acpi_ns_load_table(i, acpi_gbl_root_node);
+ (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
if (ACPI_FAILURE(status)) {
ACPI_EXCEPTION((AE_INFO, status,
"(%4.4s:%8.8s) while loading table",
@@ -235,8 +236,6 @@
} else {
tables_loaded++;
}
-
- (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
}
if (!tables_failed) {
@@ -272,7 +271,7 @@
*
******************************************************************************/
-acpi_status __init
+acpi_status ACPI_INIT_FUNCTION
acpi_install_table(acpi_physical_address address, u8 physical)
{
acpi_status status;
@@ -324,49 +323,13 @@
return_ACPI_STATUS(AE_BAD_PARAMETER);
}
- /* Must acquire the interpreter lock during this operation */
-
- status = acpi_ut_acquire_mutex(ACPI_MTX_INTERPRETER);
- if (ACPI_FAILURE(status)) {
- return_ACPI_STATUS(status);
- }
-
/* Install the table and load it into the namespace */
ACPI_INFO(("Host-directed Dynamic ACPI Table Load:"));
- (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
-
- status = acpi_tb_install_standard_table(ACPI_PTR_TO_PHYSADDR(table),
- ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL,
- TRUE, FALSE, &table_index);
-
- (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
- if (ACPI_FAILURE(status)) {
- goto unlock_and_exit;
- }
-
- /*
- * Note: Now table is "INSTALLED", it must be validated before
- * using.
- */
status =
- acpi_tb_validate_table(&acpi_gbl_root_table_list.
- tables[table_index]);
- if (ACPI_FAILURE(status)) {
- goto unlock_and_exit;
- }
-
- status = acpi_ns_load_table(table_index, acpi_gbl_root_node);
-
- /* Invoke table handler if present */
-
- if (acpi_gbl_table_handler) {
- (void)acpi_gbl_table_handler(ACPI_TABLE_EVENT_LOAD, table,
- acpi_gbl_table_handler_context);
- }
-
-unlock_and_exit:
- (void)acpi_ut_release_mutex(ACPI_MTX_INTERPRETER);
+ acpi_tb_install_and_load_table(table, ACPI_PTR_TO_PHYSADDR(table),
+ ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL,
+ FALSE, &table_index);
return_ACPI_STATUS(status);
}
@@ -415,9 +378,9 @@
return_ACPI_STATUS(AE_TYPE);
}
- /* Must acquire the interpreter lock during this operation */
+ /* Must acquire the table lock during this operation */
- status = acpi_ut_acquire_mutex(ACPI_MTX_INTERPRETER);
+ status = acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
}
@@ -444,8 +407,10 @@
/* Ensure the table is actually loaded */
+ (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
if (!acpi_tb_is_table_loaded(i)) {
status = AE_NOT_EXIST;
+ (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
break;
}
@@ -471,10 +436,11 @@
status = acpi_tb_release_owner_id(i);
acpi_tb_set_table_loaded_flag(i, FALSE);
+ (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
break;
}
- (void)acpi_ut_release_mutex(ACPI_MTX_INTERPRETER);
+ (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
return_ACPI_STATUS(status);
}
diff --git a/drivers/acpi/acpica/tbxfroot.c b/drivers/acpi/acpica/tbxfroot.c
index adb6cfc..0adb1c7 100644
--- a/drivers/acpi/acpica/tbxfroot.c
+++ b/drivers/acpi/acpica/tbxfroot.c
@@ -142,7 +142,8 @@
*
******************************************************************************/
-acpi_status __init acpi_find_root_pointer(acpi_physical_address *table_address)
+acpi_status ACPI_INIT_FUNCTION
+acpi_find_root_pointer(acpi_physical_address *table_address)
{
u8 *table_ptr;
u8 *mem_rover;
@@ -244,6 +245,8 @@
return_ACPI_STATUS(AE_NOT_FOUND);
}
+ACPI_EXPORT_SYMBOL_INIT(acpi_find_root_pointer)
+
/*******************************************************************************
*
* FUNCTION: acpi_tb_scan_memory_for_rsdp
diff --git a/drivers/acpi/acpica/utaddress.c b/drivers/acpi/acpica/utaddress.c
index c986ec6..433d822 100644
--- a/drivers/acpi/acpica/utaddress.c
+++ b/drivers/acpi/acpica/utaddress.c
@@ -77,7 +77,6 @@
u32 length, struct acpi_namespace_node *region_node)
{
struct acpi_address_range *range_info;
- acpi_status status;
ACPI_FUNCTION_TRACE(ut_add_address_range);
@@ -97,12 +96,6 @@
range_info->end_address = (address + length - 1);
range_info->region_node = region_node;
- status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
- if (ACPI_FAILURE(status)) {
- ACPI_FREE(range_info);
- return_ACPI_STATUS(status);
- }
-
range_info->next = acpi_gbl_address_range_list[space_id];
acpi_gbl_address_range_list[space_id] = range_info;
@@ -112,7 +105,6 @@
ACPI_FORMAT_UINT64(address),
ACPI_FORMAT_UINT64(range_info->end_address)));
- (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
return_ACPI_STATUS(AE_OK);
}
diff --git a/drivers/acpi/acpica/utbuffer.c b/drivers/acpi/acpica/utbuffer.c
index bd31faf..ff29812 100644
--- a/drivers/acpi/acpica/utbuffer.c
+++ b/drivers/acpi/acpica/utbuffer.c
@@ -239,8 +239,7 @@
u8 buf_char;
if (!buffer) {
- acpi_ut_file_printf(file,
- "Null Buffer Pointer in DumpBuffer!\n");
+ fprintf(file, "Null Buffer Pointer in DumpBuffer!\n");
return;
}
@@ -254,7 +253,7 @@
/* Print current offset */
- acpi_ut_file_printf(file, "%6.4X: ", (base_offset + i));
+ fprintf(file, "%6.4X: ", (base_offset + i));
/* Print 16 hex chars */
@@ -263,8 +262,7 @@
/* Dump fill spaces */
- acpi_ut_file_printf(file, "%*s",
- ((display * 2) + 1), " ");
+ fprintf(file, "%*s", ((display * 2) + 1), " ");
j += display;
continue;
}
@@ -273,34 +271,34 @@
case DB_BYTE_DISPLAY:
default: /* Default is BYTE display */
- acpi_ut_file_printf(file, "%02X ",
- buffer[(acpi_size)i + j]);
+ fprintf(file, "%02X ",
+ buffer[(acpi_size)i + j]);
break;
case DB_WORD_DISPLAY:
ACPI_MOVE_16_TO_32(&temp32,
&buffer[(acpi_size)i + j]);
- acpi_ut_file_printf(file, "%04X ", temp32);
+ fprintf(file, "%04X ", temp32);
break;
case DB_DWORD_DISPLAY:
ACPI_MOVE_32_TO_32(&temp32,
&buffer[(acpi_size)i + j]);
- acpi_ut_file_printf(file, "%08X ", temp32);
+ fprintf(file, "%08X ", temp32);
break;
case DB_QWORD_DISPLAY:
ACPI_MOVE_32_TO_32(&temp32,
&buffer[(acpi_size)i + j]);
- acpi_ut_file_printf(file, "%08X", temp32);
+ fprintf(file, "%08X", temp32);
ACPI_MOVE_32_TO_32(&temp32,
&buffer[(acpi_size)i + j +
4]);
- acpi_ut_file_printf(file, "%08X ", temp32);
+ fprintf(file, "%08X ", temp32);
break;
}
@@ -311,24 +309,24 @@
* Print the ASCII equivalent characters but watch out for the bad
* unprintable ones (printable chars are 0x20 through 0x7E)
*/
- acpi_ut_file_printf(file, " ");
+ fprintf(file, " ");
for (j = 0; j < 16; j++) {
if (i + j >= count) {
- acpi_ut_file_printf(file, "\n");
+ fprintf(file, "\n");
return;
}
buf_char = buffer[(acpi_size)i + j];
if (isprint(buf_char)) {
- acpi_ut_file_printf(file, "%c", buf_char);
+ fprintf(file, "%c", buf_char);
} else {
- acpi_ut_file_printf(file, ".");
+ fprintf(file, ".");
}
}
/* Done with that line. */
- acpi_ut_file_printf(file, "\n");
+ fprintf(file, "\n");
i += 16;
}
diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c
index 5744222..044df9b 100644
--- a/drivers/acpi/acpica/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c
@@ -562,6 +562,43 @@
/*******************************************************************************
*
+ * FUNCTION: acpi_ut_str_exit
+ *
+ * PARAMETERS: line_number - Caller's line number
+ * function_name - Caller's procedure name
+ * module_name - Caller's module name
+ * component_id - Caller's component ID
+ * string - String to display
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Function exit trace. Prints only if TRACE_FUNCTIONS bit is
+ * set in debug_level. Prints exit value also.
+ *
+ ******************************************************************************/
+
+void
+acpi_ut_str_exit(u32 line_number,
+ const char *function_name,
+ const char *module_name, u32 component_id, const char *string)
+{
+
+ /* Check if enabled up-front for performance */
+
+ if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_FUNCTIONS, component_id)) {
+ acpi_debug_print(ACPI_LV_FUNCTIONS,
+ line_number, function_name, module_name,
+ component_id, "%s %s\n",
+ acpi_gbl_function_exit_prefix, string);
+ }
+
+ if (acpi_gbl_nesting_level) {
+ acpi_gbl_nesting_level--;
+ }
+}
+
+/*******************************************************************************
+ *
* FUNCTION: acpi_trace_point
*
* PARAMETERS: type - Trace event type
@@ -591,27 +628,3 @@
ACPI_EXPORT_SYMBOL(acpi_trace_point)
#endif
-#ifdef ACPI_APPLICATION
-/*******************************************************************************
- *
- * FUNCTION: acpi_log_error
- *
- * PARAMETERS: format - Printf format field
- * ... - Optional printf arguments
- *
- * RETURN: None
- *
- * DESCRIPTION: Print error message to the console, used by applications.
- *
- ******************************************************************************/
-void ACPI_INTERNAL_VAR_XFACE acpi_log_error(const char *format, ...)
-{
- va_list args;
-
- va_start(args, format);
- (void)acpi_ut_file_vprintf(ACPI_FILE_ERR, format, args);
- va_end(args);
-}
-
-ACPI_EXPORT_SYMBOL(acpi_log_error)
-#endif
diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c
index efd7988..15728ad 100644
--- a/drivers/acpi/acpica/utdecode.c
+++ b/drivers/acpi/acpica/utdecode.c
@@ -253,7 +253,7 @@
return_PTR("Invalid object");
}
- return_PTR(acpi_ut_get_type_name(obj_desc->common.type));
+ return_STR(acpi_ut_get_type_name(obj_desc->common.type));
}
/*******************************************************************************
diff --git a/drivers/acpi/acpica/uthex.c b/drivers/acpi/acpica/uthex.c
index 4354fb8..36d2fc7 100644
--- a/drivers/acpi/acpica/uthex.c
+++ b/drivers/acpi/acpica/uthex.c
@@ -75,9 +75,41 @@
/*******************************************************************************
*
+ * FUNCTION: acpi_ut_ascii_to_hex_byte
+ *
+ * PARAMETERS: two_ascii_chars - Pointer to two ASCII characters
+ * return_byte - Where converted byte is returned
+ *
+ * RETURN: Status and converted hex byte
+ *
+ * DESCRIPTION: Perform ascii-to-hex translation, exactly two ASCII characters
+ * to a single converted byte value.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ut_ascii_to_hex_byte(char *two_ascii_chars, u8 *return_byte)
+{
+
+ /* Both ASCII characters must be valid hex digits */
+
+ if (!isxdigit((int)two_ascii_chars[0]) ||
+ !isxdigit((int)two_ascii_chars[1])) {
+ return (AE_BAD_HEX_CONSTANT);
+ }
+
+ *return_byte =
+ acpi_ut_ascii_char_to_hex(two_ascii_chars[1]) |
+ (acpi_ut_ascii_char_to_hex(two_ascii_chars[0]) << 4);
+
+ return (AE_OK);
+}
+
+/*******************************************************************************
+ *
* FUNCTION: acpi_ut_ascii_char_to_hex
*
- * PARAMETERS: hex_char - Hex character in Ascii
+ * PARAMETERS: hex_char - Hex character in Ascii. Must be:
+ * 0-9 or A-F or a-f
*
* RETURN: The binary value of the ascii/hex character
*
@@ -88,13 +120,19 @@
u8 acpi_ut_ascii_char_to_hex(int hex_char)
{
- if (hex_char <= 0x39) {
- return ((u8)(hex_char - 0x30));
+ /* Values 0-9 */
+
+ if (hex_char <= '9') {
+ return ((u8)(hex_char - '0'));
}
- if (hex_char <= 0x46) {
+ /* Upper case A-F */
+
+ if (hex_char <= 'F') {
return ((u8)(hex_char - 0x37));
}
+ /* Lower case a-f */
+
return ((u8)(hex_char - 0x57));
}
diff --git a/drivers/acpi/acpica/utinit.c b/drivers/acpi/acpica/utinit.c
index f91f724..1711fdf 100644
--- a/drivers/acpi/acpica/utinit.c
+++ b/drivers/acpi/acpica/utinit.c
@@ -206,7 +206,7 @@
acpi_gbl_next_owner_id_offset = 0;
acpi_gbl_debugger_configuration = DEBUGGER_THREADING;
acpi_gbl_osi_mutex = NULL;
- acpi_gbl_max_loop_iterations = 0xFFFF;
+ acpi_gbl_max_loop_iterations = ACPI_MAX_LOOP_COUNT;
/* Hardware oriented */
diff --git a/drivers/acpi/acpica/utnonansi.c b/drivers/acpi/acpica/utnonansi.c
index 3465fe2..2514239 100644
--- a/drivers/acpi/acpica/utnonansi.c
+++ b/drivers/acpi/acpica/utnonansi.c
@@ -48,8 +48,8 @@
ACPI_MODULE_NAME("utnonansi")
/*
- * Non-ANSI C library functions - strlwr, strupr, stricmp, and a 64-bit
- * version of strtoul.
+ * Non-ANSI C library functions - strlwr, strupr, stricmp, and "safe"
+ * string functions.
*/
/*******************************************************************************
*
@@ -200,356 +200,3 @@
return (FALSE);
}
#endif
-
-/*******************************************************************************
- *
- * FUNCTION: acpi_ut_strtoul64
- *
- * PARAMETERS: string - Null terminated string
- * base - Radix of the string: 16 or 10 or
- * ACPI_ANY_BASE
- * max_integer_byte_width - Maximum allowable integer,in bytes:
- * 4 or 8 (32 or 64 bits)
- * ret_integer - Where the converted integer is
- * returned
- *
- * RETURN: Status and Converted value
- *
- * DESCRIPTION: Convert a string into an unsigned value. Performs either a
- * 32-bit or 64-bit conversion, depending on the input integer
- * size (often the current mode of the interpreter).
- *
- * NOTES: Negative numbers are not supported, as they are not supported
- * by ACPI.
- *
- * acpi_gbl_integer_byte_width should be set to the proper width.
- * For the core ACPICA code, this width depends on the DSDT
- * version. For iASL, the default byte width is always 8 for the
- * parser, but error checking is performed later to flag cases
- * where a 64-bit constant is defined in a 32-bit DSDT/SSDT.
- *
- * Does not support Octal strings, not needed at this time.
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ut_strtoul64(char *string,
- u32 base, u32 max_integer_byte_width, u64 *ret_integer)
-{
- u32 this_digit = 0;
- u64 return_value = 0;
- u64 quotient;
- u64 dividend;
- u8 valid_digits = 0;
- u8 sign_of0x = 0;
- u8 term = 0;
-
- ACPI_FUNCTION_TRACE_STR(ut_strtoul64, string);
-
- switch (base) {
- case ACPI_ANY_BASE:
- case 10:
- case 16:
-
- break;
-
- default:
-
- /* Invalid Base */
-
- return_ACPI_STATUS(AE_BAD_PARAMETER);
- }
-
- if (!string) {
- goto error_exit;
- }
-
- /* Skip over any white space in the buffer */
-
- while ((*string) && (isspace((int)*string) || *string == '\t')) {
- string++;
- }
-
- if (base == ACPI_ANY_BASE) {
- /*
- * Base equal to ACPI_ANY_BASE means 'Either decimal or hex'.
- * We need to determine if it is decimal or hexadecimal.
- */
- if ((*string == '0') && (tolower((int)*(string + 1)) == 'x')) {
- sign_of0x = 1;
- base = 16;
-
- /* Skip over the leading '0x' */
- string += 2;
- } else {
- base = 10;
- }
- }
-
- /* Any string left? Check that '0x' is not followed by white space. */
-
- if (!(*string) || isspace((int)*string) || *string == '\t') {
- if (base == ACPI_ANY_BASE) {
- goto error_exit;
- } else {
- goto all_done;
- }
- }
-
- /*
- * Perform a 32-bit or 64-bit conversion, depending upon the input
- * byte width
- */
- dividend = (max_integer_byte_width <= ACPI_MAX32_BYTE_WIDTH) ?
- ACPI_UINT32_MAX : ACPI_UINT64_MAX;
-
- /* Main loop: convert the string to a 32- or 64-bit integer */
-
- while (*string) {
- if (isdigit((int)*string)) {
-
- /* Convert ASCII 0-9 to Decimal value */
-
- this_digit = ((u8)*string) - '0';
- } else if (base == 10) {
-
- /* Digit is out of range; possible in to_integer case only */
-
- term = 1;
- } else {
- this_digit = (u8)toupper((int)*string);
- if (isxdigit((int)this_digit)) {
-
- /* Convert ASCII Hex char to value */
-
- this_digit = this_digit - 'A' + 10;
- } else {
- term = 1;
- }
- }
-
- if (term) {
- if (base == ACPI_ANY_BASE) {
- goto error_exit;
- } else {
- break;
- }
- } else if ((valid_digits == 0) && (this_digit == 0)
- && !sign_of0x) {
-
- /* Skip zeros */
- string++;
- continue;
- }
-
- valid_digits++;
-
- if (sign_of0x && ((valid_digits > 16) ||
- ((valid_digits > 8)
- && (max_integer_byte_width <=
- ACPI_MAX32_BYTE_WIDTH)))) {
- /*
- * This is to_integer operation case.
- * No restrictions for string-to-integer conversion,
- * see ACPI spec.
- */
- goto error_exit;
- }
-
- /* Divide the digit into the correct position */
-
- (void)acpi_ut_short_divide((dividend - (u64)this_digit), base,
- "ient, NULL);
-
- if (return_value > quotient) {
- if (base == ACPI_ANY_BASE) {
- goto error_exit;
- } else {
- break;
- }
- }
-
- return_value *= base;
- return_value += this_digit;
- string++;
- }
-
- /* All done, normal exit */
-
-all_done:
-
- ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Converted value: %8.8X%8.8X\n",
- ACPI_FORMAT_UINT64(return_value)));
-
- *ret_integer = return_value;
- return_ACPI_STATUS(AE_OK);
-
-error_exit:
-
- /* Base was set/validated above (10 or 16) */
-
- if (base == 10) {
- return_ACPI_STATUS(AE_BAD_DECIMAL_CONSTANT);
- } else {
- return_ACPI_STATUS(AE_BAD_HEX_CONSTANT);
- }
-}
-
-#ifdef _OBSOLETE_FUNCTIONS
-/* Removed: 01/2016 */
-
-/*******************************************************************************
- *
- * FUNCTION: strtoul64
- *
- * PARAMETERS: string - Null terminated string
- * terminater - Where a pointer to the terminating byte
- * is returned
- * base - Radix of the string
- *
- * RETURN: Converted value
- *
- * DESCRIPTION: Convert a string into an unsigned value.
- *
- ******************************************************************************/
-
-acpi_status strtoul64(char *string, u32 base, u64 *ret_integer)
-{
- u32 index;
- u32 sign;
- u64 return_value = 0;
- acpi_status status = AE_OK;
-
- *ret_integer = 0;
-
- switch (base) {
- case 0:
- case 8:
- case 10:
- case 16:
-
- break;
-
- default:
- /*
- * The specified Base parameter is not in the domain of
- * this function:
- */
- return (AE_BAD_PARAMETER);
- }
-
- /* Skip over any white space in the buffer: */
-
- while (isspace((int)*string) || *string == '\t') {
- ++string;
- }
-
- /*
- * The buffer may contain an optional plus or minus sign.
- * If it does, then skip over it but remember what is was:
- */
- if (*string == '-') {
- sign = ACPI_SIGN_NEGATIVE;
- ++string;
- } else if (*string == '+') {
- ++string;
- sign = ACPI_SIGN_POSITIVE;
- } else {
- sign = ACPI_SIGN_POSITIVE;
- }
-
- /*
- * If the input parameter Base is zero, then we need to
- * determine if it is octal, decimal, or hexadecimal:
- */
- if (base == 0) {
- if (*string == '0') {
- if (tolower((int)*(++string)) == 'x') {
- base = 16;
- ++string;
- } else {
- base = 8;
- }
- } else {
- base = 10;
- }
- }
-
- /*
- * For octal and hexadecimal bases, skip over the leading
- * 0 or 0x, if they are present.
- */
- if (base == 8 && *string == '0') {
- string++;
- }
-
- if (base == 16 && *string == '0' && tolower((int)*(++string)) == 'x') {
- string++;
- }
-
- /* Main loop: convert the string to an unsigned long */
-
- while (*string) {
- if (isdigit((int)*string)) {
- index = ((u8)*string) - '0';
- } else {
- index = (u8)toupper((int)*string);
- if (isupper((int)index)) {
- index = index - 'A' + 10;
- } else {
- goto error_exit;
- }
- }
-
- if (index >= base) {
- goto error_exit;
- }
-
- /* Check to see if value is out of range: */
-
- if (return_value > ((ACPI_UINT64_MAX - (u64)index) / (u64)base)) {
- goto error_exit;
- } else {
- return_value *= base;
- return_value += index;
- }
-
- ++string;
- }
-
- /* If a minus sign was present, then "the conversion is negated": */
-
- if (sign == ACPI_SIGN_NEGATIVE) {
- return_value = (ACPI_UINT32_MAX - return_value) + 1;
- }
-
- *ret_integer = return_value;
- return (status);
-
-error_exit:
- switch (base) {
- case 8:
-
- status = AE_BAD_OCTAL_CONSTANT;
- break;
-
- case 10:
-
- status = AE_BAD_DECIMAL_CONSTANT;
- break;
-
- case 16:
-
- status = AE_BAD_HEX_CONSTANT;
- break;
-
- default:
-
- /* Base validated above */
-
- break;
- }
-
- return (status);
-}
-#endif
diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c
index 3f5fed6..f0484b0 100644
--- a/drivers/acpi/acpica/utosi.c
+++ b/drivers/acpi/acpica/utosi.c
@@ -390,11 +390,22 @@
* PARAMETERS: walk_state - Current walk state
*
* RETURN: Status
+ * Integer: TRUE (0) if input string is matched
+ * FALSE (-1) if string is not matched
*
* DESCRIPTION: Implementation of the _OSI predefined control method. When
* an invocation of _OSI is encountered in the system AML,
* control is transferred to this function.
*
+ * (August 2016)
+ * Note: _OSI is now defined to return "Ones" to indicate a match, for
+ * compatibility with other ACPI implementations. On a 32-bit DSDT, Ones
+ * is 0xFFFFFFFF. On a 64-bit DSDT, Ones is 0xFFFFFFFFFFFFFFFF
+ * (ACPI_UINT64_MAX).
+ *
+ * This function always returns ACPI_UINT64_MAX for TRUE, and later code
+ * will truncate this to 32 bits if necessary.
+ *
******************************************************************************/
acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state)
@@ -404,7 +415,7 @@
struct acpi_interface_info *interface_info;
acpi_interface_handler interface_handler;
acpi_status status;
- u32 return_value;
+ u64 return_value;
ACPI_FUNCTION_TRACE(ut_osi_implementation);
@@ -444,7 +455,7 @@
acpi_gbl_osi_data = interface_info->value;
}
- return_value = ACPI_UINT32_MAX;
+ return_value = ACPI_UINT64_MAX;
}
acpi_os_release_mutex(acpi_gbl_osi_mutex);
@@ -456,9 +467,10 @@
*/
interface_handler = acpi_gbl_interface_handler;
if (interface_handler) {
- return_value =
- interface_handler(string_desc->string.pointer,
- return_value);
+ if (interface_handler
+ (string_desc->string.pointer, (u32)return_value)) {
+ return_value = ACPI_UINT64_MAX;
+ }
}
ACPI_DEBUG_PRINT_RAW((ACPI_DB_INFO,
diff --git a/drivers/acpi/acpica/utpredef.c b/drivers/acpi/acpica/utpredef.c
index 770a177..ce18346 100644
--- a/drivers/acpi/acpica/utpredef.c
+++ b/drivers/acpi/acpica/utpredef.c
@@ -176,8 +176,6 @@
******************************************************************************/
#if (defined ACPI_ASL_COMPILER || defined ACPI_HELP_APP)
-#include <stdio.h>
-#include <string.h>
/* Local prototypes */
diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c
index dd084cf..40eba80 100644
--- a/drivers/acpi/acpica/utprint.c
+++ b/drivers/acpi/acpica/utprint.c
@@ -336,7 +336,7 @@
/*******************************************************************************
*
- * FUNCTION: acpi_ut_vsnprintf
+ * FUNCTION: vsnprintf
*
* PARAMETERS: string - String with boundary
* size - Boundary of the string
@@ -349,9 +349,7 @@
*
******************************************************************************/
-int
-acpi_ut_vsnprintf(char *string,
- acpi_size size, const char *format, va_list args)
+int vsnprintf(char *string, acpi_size size, const char *format, va_list args)
{
u8 base;
u8 type;
@@ -586,7 +584,7 @@
/*******************************************************************************
*
- * FUNCTION: acpi_ut_snprintf
+ * FUNCTION: snprintf
*
* PARAMETERS: string - String with boundary
* size - Boundary of the string
@@ -598,13 +596,38 @@
*
******************************************************************************/
-int acpi_ut_snprintf(char *string, acpi_size size, const char *format, ...)
+int snprintf(char *string, acpi_size size, const char *format, ...)
{
va_list args;
int length;
va_start(args, format);
- length = acpi_ut_vsnprintf(string, size, format, args);
+ length = vsnprintf(string, size, format, args);
+ va_end(args);
+
+ return (length);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: sprintf
+ *
+ * PARAMETERS: string - String with boundary
+ * Format, ... - Standard printf format
+ *
+ * RETURN: Number of bytes actually written.
+ *
+ * DESCRIPTION: Formatted output to a string.
+ *
+ ******************************************************************************/
+
+int sprintf(char *string, const char *format, ...)
+{
+ va_list args;
+ int length;
+
+ va_start(args, format);
+ length = vsnprintf(string, ACPI_UINT32_MAX, format, args);
va_end(args);
return (length);
@@ -613,7 +636,59 @@
#ifdef ACPI_APPLICATION
/*******************************************************************************
*
- * FUNCTION: acpi_ut_file_vprintf
+ * FUNCTION: vprintf
+ *
+ * PARAMETERS: format - Standard printf format
+ * args - Argument list
+ *
+ * RETURN: Number of bytes actually written.
+ *
+ * DESCRIPTION: Formatted output to stdout using argument list pointer.
+ *
+ ******************************************************************************/
+
+int vprintf(const char *format, va_list args)
+{
+ acpi_cpu_flags flags;
+ int length;
+
+ flags = acpi_os_acquire_lock(acpi_gbl_print_lock);
+ length = vsnprintf(acpi_gbl_print_buffer,
+ sizeof(acpi_gbl_print_buffer), format, args);
+
+ (void)fwrite(acpi_gbl_print_buffer, length, 1, ACPI_FILE_OUT);
+ acpi_os_release_lock(acpi_gbl_print_lock, flags);
+
+ return (length);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: printf
+ *
+ * PARAMETERS: Format, ... - Standard printf format
+ *
+ * RETURN: Number of bytes actually written.
+ *
+ * DESCRIPTION: Formatted output to stdout.
+ *
+ ******************************************************************************/
+
+int printf(const char *format, ...)
+{
+ va_list args;
+ int length;
+
+ va_start(args, format);
+ length = vprintf(format, args);
+ va_end(args);
+
+ return (length);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: vfprintf
*
* PARAMETERS: file - File descriptor
* format - Standard printf format
@@ -625,16 +700,16 @@
*
******************************************************************************/
-int acpi_ut_file_vprintf(ACPI_FILE file, const char *format, va_list args)
+int vfprintf(FILE * file, const char *format, va_list args)
{
acpi_cpu_flags flags;
int length;
flags = acpi_os_acquire_lock(acpi_gbl_print_lock);
- length = acpi_ut_vsnprintf(acpi_gbl_print_buffer,
- sizeof(acpi_gbl_print_buffer), format, args);
+ length = vsnprintf(acpi_gbl_print_buffer,
+ sizeof(acpi_gbl_print_buffer), format, args);
- (void)acpi_os_write_file(file, acpi_gbl_print_buffer, length, 1);
+ (void)fwrite(acpi_gbl_print_buffer, length, 1, file);
acpi_os_release_lock(acpi_gbl_print_lock, flags);
return (length);
@@ -642,7 +717,7 @@
/*******************************************************************************
*
- * FUNCTION: acpi_ut_file_printf
+ * FUNCTION: fprintf
*
* PARAMETERS: file - File descriptor
* Format, ... - Standard printf format
@@ -653,13 +728,13 @@
*
******************************************************************************/
-int acpi_ut_file_printf(ACPI_FILE file, const char *format, ...)
+int fprintf(FILE * file, const char *format, ...)
{
va_list args;
int length;
va_start(args, format);
- length = acpi_ut_file_vprintf(file, format, args);
+ length = vfprintf(file, format, args);
va_end(args);
return (length);
diff --git a/drivers/acpi/acpica/utstrtoul64.c b/drivers/acpi/acpica/utstrtoul64.c
new file mode 100644
index 0000000..b4f341c
--- /dev/null
+++ b/drivers/acpi/acpica/utstrtoul64.c
@@ -0,0 +1,348 @@
+/*******************************************************************************
+ *
+ * Module Name: utstrtoul64 - string to 64-bit integer support
+ *
+ ******************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2016, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+
+/*******************************************************************************
+ *
+ * The functions in this module satisfy the need for 64-bit string-to-integer
+ * conversions on both 32-bit and 64-bit platforms.
+ *
+ ******************************************************************************/
+
+#define _COMPONENT ACPI_UTILITIES
+ACPI_MODULE_NAME("utstrtoul64")
+
+/* Local prototypes */
+static u64 acpi_ut_strtoul_base10(char *string, u32 flags);
+
+static u64 acpi_ut_strtoul_base16(char *string, u32 flags);
+
+/*******************************************************************************
+ *
+ * String conversion rules as written in the ACPI specification. The error
+ * conditions and behavior are different depending on the type of conversion.
+ *
+ *
+ * Implicit data type conversion: string-to-integer
+ * --------------------------------------------------
+ *
+ * Base is always 16. This is the ACPI_STRTOUL_BASE16 case.
+ *
+ * Example:
+ * Add ("BA98", Arg0, Local0)
+ *
+ * The integer is initialized to the value zero.
+ * The ASCII string is interpreted as a hexadecimal constant.
+ *
+ * 1) A "0x" prefix is not allowed. However, ACPICA allows this for
+ * compatibility with previous ACPICA. (NO ERROR)
+ *
+ * 2) Terminates when the size of an integer is reached (32 or 64 bits).
+ * (NO ERROR)
+ *
+ * 3) The first non-hex character terminates the conversion without error.
+ * (NO ERROR)
+ *
+ * 4) Conversion of a null (zero-length) string to an integer is not
+ * allowed. However, ACPICA allows this for compatibility with previous
+ * ACPICA. This conversion returns the value 0. (NO ERROR)
+ *
+ *
+ * Explicit data type conversion: to_integer() with string operand
+ * ---------------------------------------------------------------
+ *
+ * Base is either 10 (default) or 16 (with 0x prefix)
+ *
+ * Examples:
+ * to_integer ("1000")
+ * to_integer ("0xABCD")
+ *
+ * 1) Can be (must be) either a decimal or hexadecimal numeric string.
+ * A hex value must be prefixed by "0x" or it is interpreted as a decimal.
+ *
+ * 2) The value must not exceed the maximum of an integer value. ACPI spec
+ * states the behavior is "unpredictable", so ACPICA matches the behavior
+ * of the implicit conversion case.(NO ERROR)
+ *
+ * 3) Behavior on the first non-hex character is not specified by the ACPI
+ * spec, so ACPICA matches the behavior of the implicit conversion case
+ * and terminates. (NO ERROR)
+ *
+ * 4) A null (zero-length) string is illegal.
+ * However, ACPICA allows this for compatibility with previous ACPICA.
+ * This conversion returns the value 0. (NO ERROR)
+ *
+ ******************************************************************************/
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_ut_strtoul64
+ *
+ * PARAMETERS: string - Null terminated input string
+ * flags - Conversion info, see below
+ * return_value - Where the converted integer is
+ * returned
+ *
+ * RETURN: Status and Converted value
+ *
+ * DESCRIPTION: Convert a string into an unsigned value. Performs either a
+ * 32-bit or 64-bit conversion, depending on the input integer
+ * size in Flags (often the current mode of the interpreter).
+ *
+ * Values for Flags:
+ * ACPI_STRTOUL_32BIT - Max integer value is 32 bits
+ * ACPI_STRTOUL_64BIT - Max integer value is 64 bits
+ * ACPI_STRTOUL_BASE16 - Input string is hexadecimal. Default
+ * is 10/16 based on string prefix (0x).
+ *
+ * NOTES:
+ * Negative numbers are not supported, as they are not supported by ACPI.
+ *
+ * Supports only base 16 or base 10 strings/values. Does not
+ * support Octal strings, as these are not supported by ACPI.
+ *
+ * Current users of this support:
+ *
+ * interpreter - Implicit and explicit conversions, GPE method names
+ * debugger - Command line input string conversion
+ * iASL - Main parser, conversion of constants to integers
+ * iASL - Data Table Compiler parser (constant math expressions)
+ * iASL - Preprocessor (constant math expressions)
+ * acpi_dump - Input table addresses
+ * acpi_exec - Testing of the acpi_ut_strtoul64 function
+ *
+ * Note concerning callers:
+ * acpi_gbl_integer_byte_width can be used to set the 32/64 limit. If used,
+ * this global should be set to the proper width. For the core ACPICA code,
+ * this width depends on the DSDT version. For iASL, the default byte
+ * width is always 8 for the parser, but error checking is performed later
+ * to flag cases where a 64-bit constant is defined in a 32-bit DSDT/SSDT.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ut_strtoul64(char *string, u32 flags, u64 *return_value)
+{
+ acpi_status status = AE_OK;
+ u32 base;
+
+ ACPI_FUNCTION_TRACE_STR(ut_strtoul64, string);
+
+ /* Parameter validation */
+
+ if (!string || !return_value) {
+ return_ACPI_STATUS(AE_BAD_PARAMETER);
+ }
+
+ *return_value = 0;
+
+ /* Check for zero-length string, returns 0 */
+
+ if (*string == 0) {
+ return_ACPI_STATUS(AE_OK);
+ }
+
+ /* Skip over any white space at start of string */
+
+ while (isspace((int)*string)) {
+ string++;
+ }
+
+ /* End of string? return 0 */
+
+ if (*string == 0) {
+ return_ACPI_STATUS(AE_OK);
+ }
+
+ /*
+ * 1) The "0x" prefix indicates base 16. Per the ACPI specification,
+ * the "0x" prefix is only allowed for implicit (non-strict) conversions.
+ * However, we always allow it for compatibility with older ACPICA.
+ */
+ if ((*string == ACPI_ASCII_ZERO) &&
+ (tolower((int)*(string + 1)) == 'x')) {
+ string += 2; /* Go past the 0x */
+ if (*string == 0) {
+ return_ACPI_STATUS(AE_OK); /* Return value 0 */
+ }
+
+ base = 16;
+ }
+
+ /* 2) Force to base 16 (implicit conversion case) */
+
+ else if (flags & ACPI_STRTOUL_BASE16) {
+ base = 16;
+ }
+
+ /* 3) Default fallback is to Base 10 */
+
+ else {
+ base = 10;
+ }
+
+ /* Skip all leading zeros */
+
+ while (*string == ACPI_ASCII_ZERO) {
+ string++;
+ if (*string == 0) {
+ return_ACPI_STATUS(AE_OK); /* Return value 0 */
+ }
+ }
+
+ /* Perform the base 16 or 10 conversion */
+
+ if (base == 16) {
+ *return_value = acpi_ut_strtoul_base16(string, flags);
+ } else {
+ *return_value = acpi_ut_strtoul_base10(string, flags);
+ }
+
+ return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_ut_strtoul_base10
+ *
+ * PARAMETERS: string - Null terminated input string
+ * flags - Conversion info
+ *
+ * RETURN: 64-bit converted integer
+ *
+ * DESCRIPTION: Performs a base 10 conversion of the input string to an
+ * integer value, either 32 or 64 bits.
+ * Note: String must be valid and non-null.
+ *
+ ******************************************************************************/
+
+static u64 acpi_ut_strtoul_base10(char *string, u32 flags)
+{
+ int ascii_digit;
+ u64 next_value;
+ u64 return_value = 0;
+
+ /* Main loop: convert each ASCII byte in the input string */
+
+ while (*string) {
+ ascii_digit = *string;
+ if (!isdigit(ascii_digit)) {
+
+ /* Not ASCII 0-9, terminate */
+
+ goto exit;
+ }
+
+ /* Convert and insert (add) the decimal digit */
+
+ next_value =
+ (return_value * 10) + (ascii_digit - ACPI_ASCII_ZERO);
+
+ /* Check for overflow (32 or 64 bit) - return current converted value */
+
+ if (((flags & ACPI_STRTOUL_32BIT) && (next_value > ACPI_UINT32_MAX)) || (next_value < return_value)) { /* 64-bit overflow case */
+ goto exit;
+ }
+
+ return_value = next_value;
+ string++;
+ }
+
+exit:
+ return (return_value);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_ut_strtoul_base16
+ *
+ * PARAMETERS: string - Null terminated input string
+ * flags - conversion info
+ *
+ * RETURN: 64-bit converted integer
+ *
+ * DESCRIPTION: Performs a base 16 conversion of the input string to an
+ * integer value, either 32 or 64 bits.
+ * Note: String must be valid and non-null.
+ *
+ ******************************************************************************/
+
+static u64 acpi_ut_strtoul_base16(char *string, u32 flags)
+{
+ int ascii_digit;
+ u32 valid_digits = 1;
+ u64 return_value = 0;
+
+ /* Main loop: convert each ASCII byte in the input string */
+
+ while (*string) {
+
+ /* Check for overflow (32 or 64 bit) - return current converted value */
+
+ if ((valid_digits > 16) ||
+ ((valid_digits > 8) && (flags & ACPI_STRTOUL_32BIT))) {
+ goto exit;
+ }
+
+ ascii_digit = *string;
+ if (!isxdigit(ascii_digit)) {
+
+ /* Not Hex ASCII A-F, a-f, or 0-9, terminate */
+
+ goto exit;
+ }
+
+ /* Convert and insert the hex digit */
+
+ return_value =
+ (return_value << 4) |
+ acpi_ut_ascii_char_to_hex(ascii_digit);
+
+ string++;
+ valid_digits++;
+ }
+
+exit:
+ return (return_value);
+}
diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c
index 0df07df..df31d71 100644
--- a/drivers/acpi/acpica/uttrack.c
+++ b/drivers/acpi/acpica/uttrack.c
@@ -95,13 +95,11 @@
{
struct acpi_memory_list *cache;
- cache = acpi_os_allocate(sizeof(struct acpi_memory_list));
+ cache = acpi_os_allocate_zeroed(sizeof(struct acpi_memory_list));
if (!cache) {
return (AE_NO_MEMORY);
}
- memset(cache, 0, sizeof(struct acpi_memory_list));
-
cache->list_name = list_name;
cache->object_size = object_size;
diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c
index d9e6aac..ec503c8 100644
--- a/drivers/acpi/acpica/utxface.c
+++ b/drivers/acpi/acpica/utxface.c
@@ -61,7 +61,7 @@
* DESCRIPTION: Shutdown the ACPICA subsystem and release all resources.
*
******************************************************************************/
-acpi_status __init acpi_terminate(void)
+acpi_status ACPI_INIT_FUNCTION acpi_terminate(void)
{
acpi_status status;
diff --git a/drivers/acpi/acpica/utxfinit.c b/drivers/acpi/acpica/utxfinit.c
index 75b5f27..a5ca0f5 100644
--- a/drivers/acpi/acpica/utxfinit.c
+++ b/drivers/acpi/acpica/utxfinit.c
@@ -69,7 +69,7 @@
*
******************************************************************************/
-acpi_status __init acpi_initialize_subsystem(void)
+acpi_status ACPI_INIT_FUNCTION acpi_initialize_subsystem(void)
{
acpi_status status;
@@ -141,7 +141,7 @@
* Puts system into ACPI mode if it isn't already.
*
******************************************************************************/
-acpi_status __init acpi_enable_subsystem(u32 flags)
+acpi_status ACPI_INIT_FUNCTION acpi_enable_subsystem(u32 flags)
{
acpi_status status = AE_OK;
@@ -239,7 +239,7 @@
* objects and executing AML code for Regions, buffers, etc.
*
******************************************************************************/
-acpi_status __init acpi_initialize_objects(u32 flags)
+acpi_status ACPI_INIT_FUNCTION acpi_initialize_objects(u32 flags)
{
acpi_status status = AE_OK;
@@ -265,7 +265,8 @@
* all of the tables have been loaded. It is a legacy option and is
* not compatible with other ACPI implementations. See acpi_ns_load_table.
*/
- if (acpi_gbl_group_module_level_code) {
+ if (!acpi_gbl_parse_table_as_term_list
+ && acpi_gbl_group_module_level_code) {
acpi_ns_exec_module_code_list();
/*
diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
new file mode 100644
index 0000000..4616da4
--- /dev/null
+++ b/drivers/acpi/arm64/Kconfig
@@ -0,0 +1,6 @@
+#
+# ACPI Configuration for ARM64
+#
+
+config ACPI_IORT
+ bool
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
new file mode 100644
index 0000000..72331f2
--- /dev/null
+++ b/drivers/acpi/arm64/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_ACPI_IORT) += iort.o
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
new file mode 100644
index 0000000..6b81746
--- /dev/null
+++ b/drivers/acpi/arm64/iort.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright (C) 2016, Semihalf
+ * Author: Tomasz Nowicki <tn@semihalf.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * This file implements early detection/parsing of I/O mapping
+ * reported to OS through firmware via I/O Remapping Table (IORT)
+ * IORT document number: ARM DEN 0049A
+ */
+
+#define pr_fmt(fmt) "ACPI: IORT: " fmt
+
+#include <linux/acpi_iort.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+struct iort_its_msi_chip {
+ struct list_head list;
+ struct fwnode_handle *fw_node;
+ u32 translation_id;
+};
+
+typedef acpi_status (*iort_find_node_callback)
+ (struct acpi_iort_node *node, void *context);
+
+/* Root pointer to the mapped IORT table */
+static struct acpi_table_header *iort_table;
+
+static LIST_HEAD(iort_msi_chip_list);
+static DEFINE_SPINLOCK(iort_msi_chip_lock);
+
+/**
+ * iort_register_domain_token() - register domain token and related ITS ID
+ * to the list from where we can get it back later on.
+ * @trans_id: ITS ID.
+ * @fw_node: Domain token.
+ *
+ * Returns: 0 on success, -ENOMEM if no memory when allocating list element
+ */
+int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node)
+{
+ struct iort_its_msi_chip *its_msi_chip;
+
+ its_msi_chip = kzalloc(sizeof(*its_msi_chip), GFP_KERNEL);
+ if (!its_msi_chip)
+ return -ENOMEM;
+
+ its_msi_chip->fw_node = fw_node;
+ its_msi_chip->translation_id = trans_id;
+
+ spin_lock(&iort_msi_chip_lock);
+ list_add(&its_msi_chip->list, &iort_msi_chip_list);
+ spin_unlock(&iort_msi_chip_lock);
+
+ return 0;
+}
+
+/**
+ * iort_deregister_domain_token() - Deregister domain token based on ITS ID
+ * @trans_id: ITS ID.
+ *
+ * Returns: none.
+ */
+void iort_deregister_domain_token(int trans_id)
+{
+ struct iort_its_msi_chip *its_msi_chip, *t;
+
+ spin_lock(&iort_msi_chip_lock);
+ list_for_each_entry_safe(its_msi_chip, t, &iort_msi_chip_list, list) {
+ if (its_msi_chip->translation_id == trans_id) {
+ list_del(&its_msi_chip->list);
+ kfree(its_msi_chip);
+ break;
+ }
+ }
+ spin_unlock(&iort_msi_chip_lock);
+}
+
+/**
+ * iort_find_domain_token() - Find domain token based on given ITS ID
+ * @trans_id: ITS ID.
+ *
+ * Returns: domain token when find on the list, NULL otherwise
+ */
+struct fwnode_handle *iort_find_domain_token(int trans_id)
+{
+ struct fwnode_handle *fw_node = NULL;
+ struct iort_its_msi_chip *its_msi_chip;
+
+ spin_lock(&iort_msi_chip_lock);
+ list_for_each_entry(its_msi_chip, &iort_msi_chip_list, list) {
+ if (its_msi_chip->translation_id == trans_id) {
+ fw_node = its_msi_chip->fw_node;
+ break;
+ }
+ }
+ spin_unlock(&iort_msi_chip_lock);
+
+ return fw_node;
+}
+
+static struct acpi_iort_node *iort_scan_node(enum acpi_iort_node_type type,
+ iort_find_node_callback callback,
+ void *context)
+{
+ struct acpi_iort_node *iort_node, *iort_end;
+ struct acpi_table_iort *iort;
+ int i;
+
+ if (!iort_table)
+ return NULL;
+
+ /* Get the first IORT node */
+ iort = (struct acpi_table_iort *)iort_table;
+ iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort,
+ iort->node_offset);
+ iort_end = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
+ iort_table->length);
+
+ for (i = 0; i < iort->node_count; i++) {
+ if (WARN_TAINT(iort_node >= iort_end, TAINT_FIRMWARE_WORKAROUND,
+ "IORT node pointer overflows, bad table!\n"))
+ return NULL;
+
+ if (iort_node->type == type &&
+ ACPI_SUCCESS(callback(iort_node, context)))
+ return iort_node;
+
+ iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort_node,
+ iort_node->length);
+ }
+
+ return NULL;
+}
+
+static acpi_status iort_match_node_callback(struct acpi_iort_node *node,
+ void *context)
+{
+ struct device *dev = context;
+ acpi_status status;
+
+ if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT) {
+ struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct acpi_device *adev = to_acpi_device_node(dev->fwnode);
+ struct acpi_iort_named_component *ncomp;
+
+ if (!adev) {
+ status = AE_NOT_FOUND;
+ goto out;
+ }
+
+ status = acpi_get_name(adev->handle, ACPI_FULL_PATHNAME, &buf);
+ if (ACPI_FAILURE(status)) {
+ dev_warn(dev, "Can't get device full path name\n");
+ goto out;
+ }
+
+ ncomp = (struct acpi_iort_named_component *)node->node_data;
+ status = !strcmp(ncomp->device_name, buf.pointer) ?
+ AE_OK : AE_NOT_FOUND;
+ acpi_os_free(buf.pointer);
+ } else if (node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
+ struct acpi_iort_root_complex *pci_rc;
+ struct pci_bus *bus;
+
+ bus = to_pci_bus(dev);
+ pci_rc = (struct acpi_iort_root_complex *)node->node_data;
+
+ /*
+ * It is assumed that PCI segment numbers maps one-to-one
+ * with root complexes. Each segment number can represent only
+ * one root complex.
+ */
+ status = pci_rc->pci_segment_number == pci_domain_nr(bus) ?
+ AE_OK : AE_NOT_FOUND;
+ } else {
+ status = AE_NOT_FOUND;
+ }
+out:
+ return status;
+}
+
+static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
+ u32 *rid_out)
+{
+ /* Single mapping does not care for input id */
+ if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) {
+ if (type == ACPI_IORT_NODE_NAMED_COMPONENT ||
+ type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
+ *rid_out = map->output_base;
+ return 0;
+ }
+
+ pr_warn(FW_BUG "[map %p] SINGLE MAPPING flag not allowed for node type %d, skipping ID map\n",
+ map, type);
+ return -ENXIO;
+ }
+
+ if (rid_in < map->input_base ||
+ (rid_in >= map->input_base + map->id_count))
+ return -ENXIO;
+
+ *rid_out = map->output_base + (rid_in - map->input_base);
+ return 0;
+}
+
+static struct acpi_iort_node *iort_node_map_rid(struct acpi_iort_node *node,
+ u32 rid_in, u32 *rid_out,
+ u8 type)
+{
+ u32 rid = rid_in;
+
+ /* Parse the ID mapping tree to find specified node type */
+ while (node) {
+ struct acpi_iort_id_mapping *map;
+ int i;
+
+ if (node->type == type) {
+ if (rid_out)
+ *rid_out = rid;
+ return node;
+ }
+
+ if (!node->mapping_offset || !node->mapping_count)
+ goto fail_map;
+
+ map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node,
+ node->mapping_offset);
+
+ /* Firmware bug! */
+ if (!map->output_reference) {
+ pr_err(FW_BUG "[node %p type %d] ID map has NULL parent reference\n",
+ node, node->type);
+ goto fail_map;
+ }
+
+ /* Do the RID translation */
+ for (i = 0; i < node->mapping_count; i++, map++) {
+ if (!iort_id_map(map, node->type, rid, &rid))
+ break;
+ }
+
+ if (i == node->mapping_count)
+ goto fail_map;
+
+ node = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
+ map->output_reference);
+ }
+
+fail_map:
+ /* Map input RID to output RID unchanged on mapping failure*/
+ if (rid_out)
+ *rid_out = rid_in;
+
+ return NULL;
+}
+
+static struct acpi_iort_node *iort_find_dev_node(struct device *dev)
+{
+ struct pci_bus *pbus;
+
+ if (!dev_is_pci(dev))
+ return iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
+ iort_match_node_callback, dev);
+
+ /* Find a PCI root bus */
+ pbus = to_pci_dev(dev)->bus;
+ while (!pci_is_root_bus(pbus))
+ pbus = pbus->parent;
+
+ return iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX,
+ iort_match_node_callback, &pbus->dev);
+}
+
+/**
+ * iort_msi_map_rid() - Map a MSI requester ID for a device
+ * @dev: The device for which the mapping is to be done.
+ * @req_id: The device requester ID.
+ *
+ * Returns: mapped MSI RID on success, input requester ID otherwise
+ */
+u32 iort_msi_map_rid(struct device *dev, u32 req_id)
+{
+ struct acpi_iort_node *node;
+ u32 dev_id;
+
+ node = iort_find_dev_node(dev);
+ if (!node)
+ return req_id;
+
+ iort_node_map_rid(node, req_id, &dev_id, ACPI_IORT_NODE_ITS_GROUP);
+ return dev_id;
+}
+
+/**
+ * iort_dev_find_its_id() - Find the ITS identifier for a device
+ * @dev: The device.
+ * @idx: Index of the ITS identifier list.
+ * @its_id: ITS identifier.
+ *
+ * Returns: 0 on success, appropriate error value otherwise
+ */
+static int iort_dev_find_its_id(struct device *dev, u32 req_id,
+ unsigned int idx, int *its_id)
+{
+ struct acpi_iort_its_group *its;
+ struct acpi_iort_node *node;
+
+ node = iort_find_dev_node(dev);
+ if (!node)
+ return -ENXIO;
+
+ node = iort_node_map_rid(node, req_id, NULL, ACPI_IORT_NODE_ITS_GROUP);
+ if (!node)
+ return -ENXIO;
+
+ /* Move to ITS specific data */
+ its = (struct acpi_iort_its_group *)node->node_data;
+ if (idx > its->its_count) {
+ dev_err(dev, "requested ITS ID index [%d] is greater than available [%d]\n",
+ idx, its->its_count);
+ return -ENXIO;
+ }
+
+ *its_id = its->identifiers[idx];
+ return 0;
+}
+
+/**
+ * iort_get_device_domain() - Find MSI domain related to a device
+ * @dev: The device.
+ * @req_id: Requester ID for the device.
+ *
+ * Returns: the MSI domain for this device, NULL otherwise
+ */
+struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id)
+{
+ struct fwnode_handle *handle;
+ int its_id;
+
+ if (iort_dev_find_its_id(dev, req_id, 0, &its_id))
+ return NULL;
+
+ handle = iort_find_domain_token(its_id);
+ if (!handle)
+ return NULL;
+
+ return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI);
+}
+
+void __init acpi_iort_init(void)
+{
+ acpi_status status;
+
+ status = acpi_get_table(ACPI_SIG_IORT, 0, &iort_table);
+ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+ const char *msg = acpi_format_exception(status);
+ pr_err("Failed to get table, %s\n", msg);
+ }
+}
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index ab23479..93ecae5 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -733,15 +733,17 @@
return result;
acpi_battery_init_alarm(battery);
}
+
+ result = acpi_battery_get_state(battery);
+ if (result)
+ return result;
+ acpi_battery_quirks(battery);
+
if (!battery->bat) {
result = sysfs_add_battery(battery);
if (result)
return result;
}
- result = acpi_battery_get_state(battery);
- if (result)
- return result;
- acpi_battery_quirks(battery);
/*
* Wakeup the system if battery is critical low
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 85b7d07..56190d0 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -36,6 +36,7 @@
#ifdef CONFIG_X86
#include <asm/mpspec.h>
#endif
+#include <linux/acpi_iort.h>
#include <linux/pci.h>
#include <acpi/apei.h>
#include <linux/dmi.h>
@@ -985,7 +986,8 @@
goto error0;
}
- if (acpi_gbl_group_module_level_code) {
+ if (!acpi_gbl_parse_table_as_term_list &&
+ acpi_gbl_group_module_level_code) {
status = acpi_load_tables();
if (ACPI_FAILURE(status)) {
printk(KERN_ERR PREFIX
@@ -1074,7 +1076,8 @@
status = acpi_ec_ecdt_probe();
/* Ignore result. Not having an ECDT is not fatal. */
- if (!acpi_gbl_group_module_level_code) {
+ if (acpi_gbl_parse_table_as_term_list ||
+ !acpi_gbl_group_module_level_code) {
status = acpi_load_tables();
if (ACPI_FAILURE(status)) {
printk(KERN_ERR PREFIX
@@ -1186,6 +1189,7 @@
}
pci_mmcfg_late_init();
+ acpi_iort_init();
acpi_scan_init();
acpi_ec_init();
acpi_debugfs_init();
@@ -1193,6 +1197,7 @@
acpi_wakeup_device_init();
acpi_debugger_init();
acpi_setup_sb_notify_handler();
+ acpi_set_processor_mapping();
return 0;
}
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 31abb0b..e19f530 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -19,6 +19,8 @@
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
+#define pr_fmt(fmt) "ACPI : button: " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -104,6 +106,8 @@
struct input_dev *input;
char phys[32]; /* for input device */
unsigned long pushed;
+ int last_state;
+ ktime_t last_time;
bool suspended;
};
@@ -111,6 +115,10 @@
static struct acpi_device *lid_device;
static u8 lid_init_state = ACPI_BUTTON_LID_INIT_METHOD;
+static unsigned long lid_report_interval __read_mostly = 500;
+module_param(lid_report_interval, ulong, 0644);
+MODULE_PARM_DESC(lid_report_interval, "Interval (ms) between lid key events");
+
/* --------------------------------------------------------------------------
FS Interface (/proc)
-------------------------------------------------------------------------- */
@@ -134,10 +142,79 @@
{
struct acpi_button *button = acpi_driver_data(device);
int ret;
+ ktime_t next_report;
+ bool do_update;
- /* input layer checks if event is redundant */
- input_report_switch(button->input, SW_LID, !state);
- input_sync(button->input);
+ /*
+ * In lid_init_state=ignore mode, if user opens/closes lid
+ * frequently with "open" missing, and "last_time" is also updated
+ * frequently, "close" cannot be delivered to the userspace.
+ * So "last_time" is only updated after a timeout or an actual
+ * switch.
+ */
+ if (lid_init_state != ACPI_BUTTON_LID_INIT_IGNORE ||
+ button->last_state != !!state)
+ do_update = true;
+ else
+ do_update = false;
+
+ next_report = ktime_add(button->last_time,
+ ms_to_ktime(lid_report_interval));
+ if (button->last_state == !!state &&
+ ktime_after(ktime_get(), next_report)) {
+ /* Complain the buggy firmware */
+ pr_warn_once("The lid device is not compliant to SW_LID.\n");
+
+ /*
+ * Send the unreliable complement switch event:
+ *
+ * On most platforms, the lid device is reliable. However
+ * there are exceptions:
+ * 1. Platforms returning initial lid state as "close" by
+ * default after booting/resuming:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=89211
+ * https://bugzilla.kernel.org/show_bug.cgi?id=106151
+ * 2. Platforms never reporting "open" events:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=106941
+ * On these buggy platforms, the usage model of the ACPI
+ * lid device actually is:
+ * 1. The initial returning value of _LID may not be
+ * reliable.
+ * 2. The open event may not be reliable.
+ * 3. The close event is reliable.
+ *
+ * But SW_LID is typed as input switch event, the input
+ * layer checks if the event is redundant. Hence if the
+ * state is not switched, the userspace cannot see this
+ * platform triggered reliable event. By inserting a
+ * complement switch event, it then is guaranteed that the
+ * platform triggered reliable one can always be seen by
+ * the userspace.
+ */
+ if (lid_init_state == ACPI_BUTTON_LID_INIT_IGNORE) {
+ do_update = true;
+ /*
+ * Do generate complement switch event for "close"
+ * as "close" is reliable and wrong "open" won't
+ * trigger unexpected behaviors.
+ * Do not generate complement switch event for
+ * "open" as "open" is not reliable and wrong
+ * "close" will trigger unexpected behaviors.
+ */
+ if (!state) {
+ input_report_switch(button->input,
+ SW_LID, state);
+ input_sync(button->input);
+ }
+ }
+ }
+ /* Send the platform triggered reliable event */
+ if (do_update) {
+ input_report_switch(button->input, SW_LID, !state);
+ input_sync(button->input);
+ button->last_state = !!state;
+ button->last_time = ktime_get();
+ }
if (state)
pm_wakeup_event(&device->dev, 0);
@@ -411,6 +488,8 @@
strcpy(name, ACPI_BUTTON_DEVICE_NAME_LID);
sprintf(class, "%s/%s",
ACPI_BUTTON_CLASS, ACPI_BUTTON_SUBCLASS_LID);
+ button->last_state = !!acpi_lid_evaluate_state(device);
+ button->last_time = ktime_get();
} else {
printk(KERN_ERR PREFIX "Unsupported hid [%s]\n", hid);
error = -ENODEV;
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 2e98173..d0d0504 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -40,15 +40,48 @@
#include <linux/cpufreq.h>
#include <linux/delay.h>
#include <linux/ktime.h>
+#include <linux/rwsem.h>
+#include <linux/wait.h>
#include <acpi/cppc_acpi.h>
-/*
- * Lock to provide mutually exclusive access to the PCC
- * channel. e.g. When the remote updates the shared region
- * with new data, the reader needs to be protected from
- * other CPUs activity on the same channel.
- */
-static DEFINE_SPINLOCK(pcc_lock);
+
+struct cppc_pcc_data {
+ struct mbox_chan *pcc_channel;
+ void __iomem *pcc_comm_addr;
+ int pcc_subspace_idx;
+ bool pcc_channel_acquired;
+ ktime_t deadline;
+ unsigned int pcc_mpar, pcc_mrtt, pcc_nominal;
+
+ bool pending_pcc_write_cmd; /* Any pending/batched PCC write cmds? */
+ bool platform_owns_pcc; /* Ownership of PCC subspace */
+ unsigned int pcc_write_cnt; /* Running count of PCC write commands */
+
+ /*
+ * Lock to provide controlled access to the PCC channel.
+ *
+ * For performance critical usecases(currently cppc_set_perf)
+ * We need to take read_lock and check if channel belongs to OSPM
+ * before reading or writing to PCC subspace
+ * We need to take write_lock before transferring the channel
+ * ownership to the platform via a Doorbell
+ * This allows us to batch a number of CPPC requests if they happen
+ * to originate in about the same time
+ *
+ * For non-performance critical usecases(init)
+ * Take write_lock for all purposes which gives exclusive access
+ */
+ struct rw_semaphore pcc_lock;
+
+ /* Wait queue for CPUs whose requests were batched */
+ wait_queue_head_t pcc_write_wait_q;
+};
+
+/* Structure to represent the single PCC channel */
+static struct cppc_pcc_data pcc_data = {
+ .pcc_subspace_idx = -1,
+ .platform_owns_pcc = true,
+};
/*
* The cpc_desc structure contains the ACPI register details
@@ -59,18 +92,25 @@
*/
static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr);
-/* This layer handles all the PCC specifics for CPPC. */
-static struct mbox_chan *pcc_channel;
-static void __iomem *pcc_comm_addr;
-static u64 comm_base_addr;
-static int pcc_subspace_idx = -1;
-static bool pcc_channel_acquired;
-static ktime_t deadline;
-static unsigned int pcc_mpar, pcc_mrtt;
-
/* pcc mapped address + header size + offset within PCC subspace */
-#define GET_PCC_VADDR(offs) (pcc_comm_addr + 0x8 + (offs))
+#define GET_PCC_VADDR(offs) (pcc_data.pcc_comm_addr + 0x8 + (offs))
+/* Check if a CPC regsiter is in PCC */
+#define CPC_IN_PCC(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \
+ (cpc)->cpc_entry.reg.space_id == \
+ ACPI_ADR_SPACE_PLATFORM_COMM)
+
+/* Evalutes to True if reg is a NULL register descriptor */
+#define IS_NULL_REG(reg) ((reg)->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY && \
+ (reg)->address == 0 && \
+ (reg)->bit_width == 0 && \
+ (reg)->bit_offset == 0 && \
+ (reg)->access_width == 0)
+
+/* Evalutes to True if an optional cpc field is supported */
+#define CPC_SUPPORTED(cpc) ((cpc)->type == ACPI_TYPE_INTEGER ? \
+ !!(cpc)->cpc_entry.int_value : \
+ !IS_NULL_REG(&(cpc)->cpc_entry.reg))
/*
* Arbitrary Retries in case the remote processor is slow to respond
* to PCC commands. Keeping it high enough to cover emulators where
@@ -78,11 +118,79 @@
*/
#define NUM_RETRIES 500
-static int check_pcc_chan(void)
+struct cppc_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct kobject *kobj,
+ struct attribute *attr, char *buf);
+ ssize_t (*store)(struct kobject *kobj,
+ struct attribute *attr, const char *c, ssize_t count);
+};
+
+#define define_one_cppc_ro(_name) \
+static struct cppc_attr _name = \
+__ATTR(_name, 0444, show_##_name, NULL)
+
+#define to_cpc_desc(a) container_of(a, struct cpc_desc, kobj)
+
+static ssize_t show_feedback_ctrs(struct kobject *kobj,
+ struct attribute *attr, char *buf)
{
- int ret = -EIO;
- struct acpi_pcct_shared_memory __iomem *generic_comm_base = pcc_comm_addr;
- ktime_t next_deadline = ktime_add(ktime_get(), deadline);
+ struct cpc_desc *cpc_ptr = to_cpc_desc(kobj);
+ struct cppc_perf_fb_ctrs fb_ctrs = {0};
+
+ cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs);
+
+ return scnprintf(buf, PAGE_SIZE, "ref:%llu del:%llu\n",
+ fb_ctrs.reference, fb_ctrs.delivered);
+}
+define_one_cppc_ro(feedback_ctrs);
+
+static ssize_t show_reference_perf(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct cpc_desc *cpc_ptr = to_cpc_desc(kobj);
+ struct cppc_perf_fb_ctrs fb_ctrs = {0};
+
+ cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs);
+
+ return scnprintf(buf, PAGE_SIZE, "%llu\n",
+ fb_ctrs.reference_perf);
+}
+define_one_cppc_ro(reference_perf);
+
+static ssize_t show_wraparound_time(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct cpc_desc *cpc_ptr = to_cpc_desc(kobj);
+ struct cppc_perf_fb_ctrs fb_ctrs = {0};
+
+ cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs);
+
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", fb_ctrs.ctr_wrap_time);
+
+}
+define_one_cppc_ro(wraparound_time);
+
+static struct attribute *cppc_attrs[] = {
+ &feedback_ctrs.attr,
+ &reference_perf.attr,
+ &wraparound_time.attr,
+ NULL
+};
+
+static struct kobj_type cppc_ktype = {
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_attrs = cppc_attrs,
+};
+
+static int check_pcc_chan(bool chk_err_bit)
+{
+ int ret = -EIO, status = 0;
+ struct acpi_pcct_shared_memory __iomem *generic_comm_base = pcc_data.pcc_comm_addr;
+ ktime_t next_deadline = ktime_add(ktime_get(), pcc_data.deadline);
+
+ if (!pcc_data.platform_owns_pcc)
+ return 0;
/* Retry in case the remote processor was too slow to catch up. */
while (!ktime_after(ktime_get(), next_deadline)) {
@@ -91,8 +199,11 @@
* platform and should have set the command completion bit when
* PCC can be used by OSPM
*/
- if (readw_relaxed(&generic_comm_base->status) & PCC_CMD_COMPLETE) {
+ status = readw_relaxed(&generic_comm_base->status);
+ if (status & PCC_CMD_COMPLETE_MASK) {
ret = 0;
+ if (chk_err_bit && (status & PCC_ERROR_MASK))
+ ret = -EIO;
break;
}
/*
@@ -102,14 +213,23 @@
udelay(3);
}
+ if (likely(!ret))
+ pcc_data.platform_owns_pcc = false;
+ else
+ pr_err("PCC check channel failed. Status=%x\n", status);
+
return ret;
}
+/*
+ * This function transfers the ownership of the PCC to the platform
+ * So it must be called while holding write_lock(pcc_lock)
+ */
static int send_pcc_cmd(u16 cmd)
{
- int ret = -EIO;
+ int ret = -EIO, i;
struct acpi_pcct_shared_memory *generic_comm_base =
- (struct acpi_pcct_shared_memory *) pcc_comm_addr;
+ (struct acpi_pcct_shared_memory *) pcc_data.pcc_comm_addr;
static ktime_t last_cmd_cmpl_time, last_mpar_reset;
static int mpar_count;
unsigned int time_delta;
@@ -119,20 +239,29 @@
* the channel before writing to PCC space
*/
if (cmd == CMD_READ) {
- ret = check_pcc_chan();
+ /*
+ * If there are pending cpc_writes, then we stole the channel
+ * before write completion, so first send a WRITE command to
+ * platform
+ */
+ if (pcc_data.pending_pcc_write_cmd)
+ send_pcc_cmd(CMD_WRITE);
+
+ ret = check_pcc_chan(false);
if (ret)
- return ret;
- }
+ goto end;
+ } else /* CMD_WRITE */
+ pcc_data.pending_pcc_write_cmd = FALSE;
/*
* Handle the Minimum Request Turnaround Time(MRTT)
* "The minimum amount of time that OSPM must wait after the completion
* of a command before issuing the next command, in microseconds"
*/
- if (pcc_mrtt) {
+ if (pcc_data.pcc_mrtt) {
time_delta = ktime_us_delta(ktime_get(), last_cmd_cmpl_time);
- if (pcc_mrtt > time_delta)
- udelay(pcc_mrtt - time_delta);
+ if (pcc_data.pcc_mrtt > time_delta)
+ udelay(pcc_data.pcc_mrtt - time_delta);
}
/*
@@ -146,15 +275,16 @@
* not send the request to the platform after hitting the MPAR limit in
* any 60s window
*/
- if (pcc_mpar) {
+ if (pcc_data.pcc_mpar) {
if (mpar_count == 0) {
time_delta = ktime_ms_delta(ktime_get(), last_mpar_reset);
if (time_delta < 60 * MSEC_PER_SEC) {
pr_debug("PCC cmd not sent due to MPAR limit");
- return -EIO;
+ ret = -EIO;
+ goto end;
}
last_mpar_reset = ktime_get();
- mpar_count = pcc_mpar;
+ mpar_count = pcc_data.pcc_mpar;
}
mpar_count--;
}
@@ -165,33 +295,43 @@
/* Flip CMD COMPLETE bit */
writew_relaxed(0, &generic_comm_base->status);
+ pcc_data.platform_owns_pcc = true;
+
/* Ring doorbell */
- ret = mbox_send_message(pcc_channel, &cmd);
+ ret = mbox_send_message(pcc_data.pcc_channel, &cmd);
if (ret < 0) {
pr_err("Err sending PCC mbox message. cmd:%d, ret:%d\n",
cmd, ret);
- return ret;
+ goto end;
}
- /*
- * For READs we need to ensure the cmd completed to ensure
- * the ensuing read()s can proceed. For WRITEs we dont care
- * because the actual write()s are done before coming here
- * and the next READ or WRITE will check if the channel
- * is busy/free at the entry of this call.
- *
- * If Minimum Request Turnaround Time is non-zero, we need
- * to record the completion time of both READ and WRITE
- * command for proper handling of MRTT, so we need to check
- * for pcc_mrtt in addition to CMD_READ
- */
- if (cmd == CMD_READ || pcc_mrtt) {
- ret = check_pcc_chan();
- if (pcc_mrtt)
- last_cmd_cmpl_time = ktime_get();
+ /* wait for completion and check for PCC errro bit */
+ ret = check_pcc_chan(true);
+
+ if (pcc_data.pcc_mrtt)
+ last_cmd_cmpl_time = ktime_get();
+
+ if (pcc_data.pcc_channel->mbox->txdone_irq)
+ mbox_chan_txdone(pcc_data.pcc_channel, ret);
+ else
+ mbox_client_txdone(pcc_data.pcc_channel, ret);
+
+end:
+ if (cmd == CMD_WRITE) {
+ if (unlikely(ret)) {
+ for_each_possible_cpu(i) {
+ struct cpc_desc *desc = per_cpu(cpc_desc_ptr, i);
+ if (!desc)
+ continue;
+
+ if (desc->write_cmd_id == pcc_data.pcc_write_cnt)
+ desc->write_cmd_status = ret;
+ }
+ }
+ pcc_data.pcc_write_cnt++;
+ wake_up_all(&pcc_data.pcc_write_wait_q);
}
- mbox_client_txdone(pcc_channel, ret);
return ret;
}
@@ -272,13 +412,13 @@
*
* Return: 0 for success or negative value for err.
*/
-int acpi_get_psd_map(struct cpudata **all_cpu_data)
+int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data)
{
int count_target;
int retval = 0;
unsigned int i, j;
cpumask_var_t covered_cpus;
- struct cpudata *pr, *match_pr;
+ struct cppc_cpudata *pr, *match_pr;
struct acpi_psd_package *pdomain;
struct acpi_psd_package *match_pdomain;
struct cpc_desc *cpc_ptr, *match_cpc_ptr;
@@ -394,14 +534,13 @@
static int register_pcc_channel(int pcc_subspace_idx)
{
struct acpi_pcct_hw_reduced *cppc_ss;
- unsigned int len;
u64 usecs_lat;
if (pcc_subspace_idx >= 0) {
- pcc_channel = pcc_mbox_request_channel(&cppc_mbox_cl,
+ pcc_data.pcc_channel = pcc_mbox_request_channel(&cppc_mbox_cl,
pcc_subspace_idx);
- if (IS_ERR(pcc_channel)) {
+ if (IS_ERR(pcc_data.pcc_channel)) {
pr_err("Failed to find PCC communication channel\n");
return -ENODEV;
}
@@ -412,7 +551,7 @@
* PCC channels) and stored pointers to the
* subspace communication region in con_priv.
*/
- cppc_ss = pcc_channel->con_priv;
+ cppc_ss = (pcc_data.pcc_channel)->con_priv;
if (!cppc_ss) {
pr_err("No PCC subspace found for CPPC\n");
@@ -420,35 +559,42 @@
}
/*
- * This is the shared communication region
- * for the OS and Platform to communicate over.
- */
- comm_base_addr = cppc_ss->base_address;
- len = cppc_ss->length;
-
- /*
* cppc_ss->latency is just a Nominal value. In reality
* the remote processor could be much slower to reply.
* So add an arbitrary amount of wait on top of Nominal.
*/
usecs_lat = NUM_RETRIES * cppc_ss->latency;
- deadline = ns_to_ktime(usecs_lat * NSEC_PER_USEC);
- pcc_mrtt = cppc_ss->min_turnaround_time;
- pcc_mpar = cppc_ss->max_access_rate;
+ pcc_data.deadline = ns_to_ktime(usecs_lat * NSEC_PER_USEC);
+ pcc_data.pcc_mrtt = cppc_ss->min_turnaround_time;
+ pcc_data.pcc_mpar = cppc_ss->max_access_rate;
+ pcc_data.pcc_nominal = cppc_ss->latency;
- pcc_comm_addr = acpi_os_ioremap(comm_base_addr, len);
- if (!pcc_comm_addr) {
+ pcc_data.pcc_comm_addr = acpi_os_ioremap(cppc_ss->base_address, cppc_ss->length);
+ if (!pcc_data.pcc_comm_addr) {
pr_err("Failed to ioremap PCC comm region mem\n");
return -ENOMEM;
}
/* Set flag so that we dont come here for each CPU. */
- pcc_channel_acquired = true;
+ pcc_data.pcc_channel_acquired = true;
}
return 0;
}
+/**
+ * cpc_ffh_supported() - check if FFH reading supported
+ *
+ * Check if the architecture has support for functional fixed hardware
+ * read/write capability.
+ *
+ * Return: true for supported, false for not supported
+ */
+bool __weak cpc_ffh_supported(void)
+{
+ return false;
+}
+
/*
* An example CPC table looks like the following.
*
@@ -507,6 +653,7 @@
union acpi_object *out_obj, *cpc_obj;
struct cpc_desc *cpc_ptr;
struct cpc_reg *gas_t;
+ struct device *cpu_dev;
acpi_handle handle = pr->handle;
unsigned int num_ent, i, cpc_rev;
acpi_status status;
@@ -545,6 +692,8 @@
goto out_free;
}
+ cpc_ptr->num_entries = num_ent;
+
/* Second entry should be revision. */
cpc_obj = &out_obj->package.elements[1];
if (cpc_obj->type == ACPI_TYPE_INTEGER) {
@@ -579,16 +728,27 @@
* so extract it only once.
*/
if (gas_t->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
- if (pcc_subspace_idx < 0)
- pcc_subspace_idx = gas_t->access_width;
- else if (pcc_subspace_idx != gas_t->access_width) {
+ if (pcc_data.pcc_subspace_idx < 0)
+ pcc_data.pcc_subspace_idx = gas_t->access_width;
+ else if (pcc_data.pcc_subspace_idx != gas_t->access_width) {
pr_debug("Mismatched PCC ids.\n");
goto out_free;
}
- } else if (gas_t->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) {
- /* Support only PCC and SYS MEM type regs */
- pr_debug("Unsupported register type: %d\n", gas_t->space_id);
- goto out_free;
+ } else if (gas_t->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+ if (gas_t->address) {
+ void __iomem *addr;
+
+ addr = ioremap(gas_t->address, gas_t->bit_width/8);
+ if (!addr)
+ goto out_free;
+ cpc_ptr->cpc_regs[i-2].sys_mem_vaddr = addr;
+ }
+ } else {
+ if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) {
+ /* Support only PCC ,SYS MEM and FFH type regs */
+ pr_debug("Unsupported register type: %d\n", gas_t->space_id);
+ goto out_free;
+ }
}
cpc_ptr->cpc_regs[i-2].type = ACPI_TYPE_BUFFER;
@@ -607,10 +767,13 @@
goto out_free;
/* Register PCC channel once for all CPUs. */
- if (!pcc_channel_acquired) {
- ret = register_pcc_channel(pcc_subspace_idx);
+ if (!pcc_data.pcc_channel_acquired) {
+ ret = register_pcc_channel(pcc_data.pcc_subspace_idx);
if (ret)
goto out_free;
+
+ init_rwsem(&pcc_data.pcc_lock);
+ init_waitqueue_head(&pcc_data.pcc_write_wait_q);
}
/* Plug PSD data into this CPUs CPC descriptor. */
@@ -619,10 +782,27 @@
/* Everything looks okay */
pr_debug("Parsed CPC struct for CPU: %d\n", pr->id);
+ /* Add per logical CPU nodes for reading its feedback counters. */
+ cpu_dev = get_cpu_device(pr->id);
+ if (!cpu_dev)
+ goto out_free;
+
+ ret = kobject_init_and_add(&cpc_ptr->kobj, &cppc_ktype, &cpu_dev->kobj,
+ "acpi_cppc");
+ if (ret)
+ goto out_free;
+
kfree(output.pointer);
return 0;
out_free:
+ /* Free all the mapped sys mem areas for this CPU */
+ for (i = 2; i < cpc_ptr->num_entries; i++) {
+ void __iomem *addr = cpc_ptr->cpc_regs[i-2].sys_mem_vaddr;
+
+ if (addr)
+ iounmap(addr);
+ }
kfree(cpc_ptr);
out_buf_free:
@@ -640,26 +820,82 @@
void acpi_cppc_processor_exit(struct acpi_processor *pr)
{
struct cpc_desc *cpc_ptr;
+ unsigned int i;
+ void __iomem *addr;
+
cpc_ptr = per_cpu(cpc_desc_ptr, pr->id);
+
+ /* Free all the mapped sys mem areas for this CPU */
+ for (i = 2; i < cpc_ptr->num_entries; i++) {
+ addr = cpc_ptr->cpc_regs[i-2].sys_mem_vaddr;
+ if (addr)
+ iounmap(addr);
+ }
+
+ kobject_put(&cpc_ptr->kobj);
kfree(cpc_ptr);
}
EXPORT_SYMBOL_GPL(acpi_cppc_processor_exit);
+/**
+ * cpc_read_ffh() - Read FFH register
+ * @cpunum: cpu number to read
+ * @reg: cppc register information
+ * @val: place holder for return value
+ *
+ * Read bit_width bits from a specified address and bit_offset
+ *
+ * Return: 0 for success and error code
+ */
+int __weak cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
+{
+ return -ENOTSUPP;
+}
+
+/**
+ * cpc_write_ffh() - Write FFH register
+ * @cpunum: cpu number to write
+ * @reg: cppc register information
+ * @val: value to write
+ *
+ * Write value of bit_width bits to a specified address and bit_offset
+ *
+ * Return: 0 for success and error code
+ */
+int __weak cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+{
+ return -ENOTSUPP;
+}
+
/*
* Since cpc_read and cpc_write are called while holding pcc_lock, it should be
* as fast as possible. We have already mapped the PCC subspace during init, so
* we can directly write to it.
*/
-static int cpc_read(struct cpc_reg *reg, u64 *val)
+static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val)
{
int ret_val = 0;
+ void __iomem *vaddr = 0;
+ struct cpc_reg *reg = ®_res->cpc_entry.reg;
+
+ if (reg_res->type == ACPI_TYPE_INTEGER) {
+ *val = reg_res->cpc_entry.int_value;
+ return ret_val;
+ }
*val = 0;
- if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
- void __iomem *vaddr = GET_PCC_VADDR(reg->address);
+ if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM)
+ vaddr = GET_PCC_VADDR(reg->address);
+ else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
+ vaddr = reg_res->sys_mem_vaddr;
+ else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE)
+ return cpc_read_ffh(cpu, reg, val);
+ else
+ return acpi_os_read_memory((acpi_physical_address)reg->address,
+ val, reg->bit_width);
- switch (reg->bit_width) {
+ switch (reg->bit_width) {
case 8:
*val = readb_relaxed(vaddr);
break;
@@ -674,23 +910,30 @@
break;
default:
pr_debug("Error: Cannot read %u bit width from PCC\n",
- reg->bit_width);
+ reg->bit_width);
ret_val = -EFAULT;
- }
- } else
- ret_val = acpi_os_read_memory((acpi_physical_address)reg->address,
- val, reg->bit_width);
+ }
+
return ret_val;
}
-static int cpc_write(struct cpc_reg *reg, u64 val)
+static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
{
int ret_val = 0;
+ void __iomem *vaddr = 0;
+ struct cpc_reg *reg = ®_res->cpc_entry.reg;
- if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
- void __iomem *vaddr = GET_PCC_VADDR(reg->address);
+ if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM)
+ vaddr = GET_PCC_VADDR(reg->address);
+ else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
+ vaddr = reg_res->sys_mem_vaddr;
+ else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE)
+ return cpc_write_ffh(cpu, reg, val);
+ else
+ return acpi_os_write_memory((acpi_physical_address)reg->address,
+ val, reg->bit_width);
- switch (reg->bit_width) {
+ switch (reg->bit_width) {
case 8:
writeb_relaxed(val, vaddr);
break;
@@ -705,13 +948,11 @@
break;
default:
pr_debug("Error: Cannot write %u bit width to PCC\n",
- reg->bit_width);
+ reg->bit_width);
ret_val = -EFAULT;
break;
- }
- } else
- ret_val = acpi_os_write_memory((acpi_physical_address)reg->address,
- val, reg->bit_width);
+ }
+
return ret_val;
}
@@ -727,8 +968,8 @@
struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
struct cpc_register_resource *highest_reg, *lowest_reg, *ref_perf,
*nom_perf;
- u64 high, low, ref, nom;
- int ret = 0;
+ u64 high, low, nom;
+ int ret = 0, regs_in_pcc = 0;
if (!cpc_desc) {
pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
@@ -740,13 +981,11 @@
ref_perf = &cpc_desc->cpc_regs[REFERENCE_PERF];
nom_perf = &cpc_desc->cpc_regs[NOMINAL_PERF];
- spin_lock(&pcc_lock);
-
/* Are any of the regs PCC ?*/
- if ((highest_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) ||
- (lowest_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) ||
- (ref_perf->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) ||
- (nom_perf->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM)) {
+ if (CPC_IN_PCC(highest_reg) || CPC_IN_PCC(lowest_reg) ||
+ CPC_IN_PCC(ref_perf) || CPC_IN_PCC(nom_perf)) {
+ regs_in_pcc = 1;
+ down_write(&pcc_data.pcc_lock);
/* Ring doorbell once to update PCC subspace */
if (send_pcc_cmd(CMD_READ) < 0) {
ret = -EIO;
@@ -754,26 +993,21 @@
}
}
- cpc_read(&highest_reg->cpc_entry.reg, &high);
+ cpc_read(cpunum, highest_reg, &high);
perf_caps->highest_perf = high;
- cpc_read(&lowest_reg->cpc_entry.reg, &low);
+ cpc_read(cpunum, lowest_reg, &low);
perf_caps->lowest_perf = low;
- cpc_read(&ref_perf->cpc_entry.reg, &ref);
- perf_caps->reference_perf = ref;
-
- cpc_read(&nom_perf->cpc_entry.reg, &nom);
+ cpc_read(cpunum, nom_perf, &nom);
perf_caps->nominal_perf = nom;
- if (!ref)
- perf_caps->reference_perf = perf_caps->nominal_perf;
-
if (!high || !low || !nom)
ret = -EFAULT;
out_err:
- spin_unlock(&pcc_lock);
+ if (regs_in_pcc)
+ up_write(&pcc_data.pcc_lock);
return ret;
}
EXPORT_SYMBOL_GPL(cppc_get_perf_caps);
@@ -788,9 +1022,10 @@
int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
{
struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
- struct cpc_register_resource *delivered_reg, *reference_reg;
- u64 delivered, reference;
- int ret = 0;
+ struct cpc_register_resource *delivered_reg, *reference_reg,
+ *ref_perf_reg, *ctr_wrap_reg;
+ u64 delivered, reference, ref_perf, ctr_wrap_time;
+ int ret = 0, regs_in_pcc = 0;
if (!cpc_desc) {
pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
@@ -799,12 +1034,21 @@
delivered_reg = &cpc_desc->cpc_regs[DELIVERED_CTR];
reference_reg = &cpc_desc->cpc_regs[REFERENCE_CTR];
+ ref_perf_reg = &cpc_desc->cpc_regs[REFERENCE_PERF];
+ ctr_wrap_reg = &cpc_desc->cpc_regs[CTR_WRAP_TIME];
- spin_lock(&pcc_lock);
+ /*
+ * If refernce perf register is not supported then we should
+ * use the nominal perf value
+ */
+ if (!CPC_SUPPORTED(ref_perf_reg))
+ ref_perf_reg = &cpc_desc->cpc_regs[NOMINAL_PERF];
/* Are any of the regs PCC ?*/
- if ((delivered_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) ||
- (reference_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM)) {
+ if (CPC_IN_PCC(delivered_reg) || CPC_IN_PCC(reference_reg) ||
+ CPC_IN_PCC(ctr_wrap_reg) || CPC_IN_PCC(ref_perf_reg)) {
+ down_write(&pcc_data.pcc_lock);
+ regs_in_pcc = 1;
/* Ring doorbell once to update PCC subspace */
if (send_pcc_cmd(CMD_READ) < 0) {
ret = -EIO;
@@ -812,25 +1056,31 @@
}
}
- cpc_read(&delivered_reg->cpc_entry.reg, &delivered);
- cpc_read(&reference_reg->cpc_entry.reg, &reference);
+ cpc_read(cpunum, delivered_reg, &delivered);
+ cpc_read(cpunum, reference_reg, &reference);
+ cpc_read(cpunum, ref_perf_reg, &ref_perf);
- if (!delivered || !reference) {
+ /*
+ * Per spec, if ctr_wrap_time optional register is unsupported, then the
+ * performance counters are assumed to never wrap during the lifetime of
+ * platform
+ */
+ ctr_wrap_time = (u64)(~((u64)0));
+ if (CPC_SUPPORTED(ctr_wrap_reg))
+ cpc_read(cpunum, ctr_wrap_reg, &ctr_wrap_time);
+
+ if (!delivered || !reference || !ref_perf) {
ret = -EFAULT;
goto out_err;
}
perf_fb_ctrs->delivered = delivered;
perf_fb_ctrs->reference = reference;
-
- perf_fb_ctrs->delivered -= perf_fb_ctrs->prev_delivered;
- perf_fb_ctrs->reference -= perf_fb_ctrs->prev_reference;
-
- perf_fb_ctrs->prev_delivered = delivered;
- perf_fb_ctrs->prev_reference = reference;
-
+ perf_fb_ctrs->reference_perf = ref_perf;
+ perf_fb_ctrs->ctr_wrap_time = ctr_wrap_time;
out_err:
- spin_unlock(&pcc_lock);
+ if (regs_in_pcc)
+ up_write(&pcc_data.pcc_lock);
return ret;
}
EXPORT_SYMBOL_GPL(cppc_get_perf_ctrs);
@@ -855,30 +1105,142 @@
desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
- spin_lock(&pcc_lock);
-
- /* If this is PCC reg, check if channel is free before writing */
- if (desired_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
- ret = check_pcc_chan();
- if (ret)
- goto busy_channel;
+ /*
+ * This is Phase-I where we want to write to CPC registers
+ * -> We want all CPUs to be able to execute this phase in parallel
+ *
+ * Since read_lock can be acquired by multiple CPUs simultaneously we
+ * achieve that goal here
+ */
+ if (CPC_IN_PCC(desired_reg)) {
+ down_read(&pcc_data.pcc_lock); /* BEGIN Phase-I */
+ if (pcc_data.platform_owns_pcc) {
+ ret = check_pcc_chan(false);
+ if (ret) {
+ up_read(&pcc_data.pcc_lock);
+ return ret;
+ }
+ }
+ /*
+ * Update the pending_write to make sure a PCC CMD_READ will not
+ * arrive and steal the channel during the switch to write lock
+ */
+ pcc_data.pending_pcc_write_cmd = true;
+ cpc_desc->write_cmd_id = pcc_data.pcc_write_cnt;
+ cpc_desc->write_cmd_status = 0;
}
/*
* Skip writing MIN/MAX until Linux knows how to come up with
* useful values.
*/
- cpc_write(&desired_reg->cpc_entry.reg, perf_ctrls->desired_perf);
+ cpc_write(cpu, desired_reg, perf_ctrls->desired_perf);
- /* Is this a PCC reg ?*/
- if (desired_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
- /* Ring doorbell so Remote can get our perf request. */
- if (send_pcc_cmd(CMD_WRITE) < 0)
- ret = -EIO;
+ if (CPC_IN_PCC(desired_reg))
+ up_read(&pcc_data.pcc_lock); /* END Phase-I */
+ /*
+ * This is Phase-II where we transfer the ownership of PCC to Platform
+ *
+ * Short Summary: Basically if we think of a group of cppc_set_perf
+ * requests that happened in short overlapping interval. The last CPU to
+ * come out of Phase-I will enter Phase-II and ring the doorbell.
+ *
+ * We have the following requirements for Phase-II:
+ * 1. We want to execute Phase-II only when there are no CPUs
+ * currently executing in Phase-I
+ * 2. Once we start Phase-II we want to avoid all other CPUs from
+ * entering Phase-I.
+ * 3. We want only one CPU among all those who went through Phase-I
+ * to run phase-II
+ *
+ * If write_trylock fails to get the lock and doesn't transfer the
+ * PCC ownership to the platform, then one of the following will be TRUE
+ * 1. There is at-least one CPU in Phase-I which will later execute
+ * write_trylock, so the CPUs in Phase-I will be responsible for
+ * executing the Phase-II.
+ * 2. Some other CPU has beaten this CPU to successfully execute the
+ * write_trylock and has already acquired the write_lock. We know for a
+ * fact it(other CPU acquiring the write_lock) couldn't have happened
+ * before this CPU's Phase-I as we held the read_lock.
+ * 3. Some other CPU executing pcc CMD_READ has stolen the
+ * down_write, in which case, send_pcc_cmd will check for pending
+ * CMD_WRITE commands by checking the pending_pcc_write_cmd.
+ * So this CPU can be certain that its request will be delivered
+ * So in all cases, this CPU knows that its request will be delivered
+ * by another CPU and can return
+ *
+ * After getting the down_write we still need to check for
+ * pending_pcc_write_cmd to take care of the following scenario
+ * The thread running this code could be scheduled out between
+ * Phase-I and Phase-II. Before it is scheduled back on, another CPU
+ * could have delivered the request to Platform by triggering the
+ * doorbell and transferred the ownership of PCC to platform. So this
+ * avoids triggering an unnecessary doorbell and more importantly before
+ * triggering the doorbell it makes sure that the PCC channel ownership
+ * is still with OSPM.
+ * pending_pcc_write_cmd can also be cleared by a different CPU, if
+ * there was a pcc CMD_READ waiting on down_write and it steals the lock
+ * before the pcc CMD_WRITE is completed. pcc_send_cmd checks for this
+ * case during a CMD_READ and if there are pending writes it delivers
+ * the write command before servicing the read command
+ */
+ if (CPC_IN_PCC(desired_reg)) {
+ if (down_write_trylock(&pcc_data.pcc_lock)) { /* BEGIN Phase-II */
+ /* Update only if there are pending write commands */
+ if (pcc_data.pending_pcc_write_cmd)
+ send_pcc_cmd(CMD_WRITE);
+ up_write(&pcc_data.pcc_lock); /* END Phase-II */
+ } else
+ /* Wait until pcc_write_cnt is updated by send_pcc_cmd */
+ wait_event(pcc_data.pcc_write_wait_q,
+ cpc_desc->write_cmd_id != pcc_data.pcc_write_cnt);
+
+ /* send_pcc_cmd updates the status in case of failure */
+ ret = cpc_desc->write_cmd_status;
}
-busy_channel:
- spin_unlock(&pcc_lock);
-
return ret;
}
EXPORT_SYMBOL_GPL(cppc_set_perf);
+
+/**
+ * cppc_get_transition_latency - returns frequency transition latency in ns
+ *
+ * ACPI CPPC does not explicitly specifiy how a platform can specify the
+ * transition latency for perfromance change requests. The closest we have
+ * is the timing information from the PCCT tables which provides the info
+ * on the number and frequency of PCC commands the platform can handle.
+ */
+unsigned int cppc_get_transition_latency(int cpu_num)
+{
+ /*
+ * Expected transition latency is based on the PCCT timing values
+ * Below are definition from ACPI spec:
+ * pcc_nominal- Expected latency to process a command, in microseconds
+ * pcc_mpar - The maximum number of periodic requests that the subspace
+ * channel can support, reported in commands per minute. 0
+ * indicates no limitation.
+ * pcc_mrtt - The minimum amount of time that OSPM must wait after the
+ * completion of a command before issuing the next command,
+ * in microseconds.
+ */
+ unsigned int latency_ns = 0;
+ struct cpc_desc *cpc_desc;
+ struct cpc_register_resource *desired_reg;
+
+ cpc_desc = per_cpu(cpc_desc_ptr, cpu_num);
+ if (!cpc_desc)
+ return CPUFREQ_ETERNAL;
+
+ desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
+ if (!CPC_IN_PCC(desired_reg))
+ return CPUFREQ_ETERNAL;
+
+ if (pcc_data.pcc_mpar)
+ latency_ns = 60 * (1000 * 1000 * 1000 / pcc_data.pcc_mpar);
+
+ latency_ns = max(latency_ns, pcc_data.pcc_nominal * 1000);
+ latency_ns = max(latency_ns, pcc_data.pcc_mrtt * 1000);
+
+ return latency_ns;
+}
+EXPORT_SYMBOL_GPL(cppc_get_transition_latency);
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index e7bd57c..6805310 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -104,10 +104,12 @@
#define ACPI_EC_MAX_QUERIES 16 /* Maximum number of parallel queries */
enum {
+ EC_FLAGS_QUERY_ENABLED, /* Query is enabled */
EC_FLAGS_QUERY_PENDING, /* Query is pending */
EC_FLAGS_QUERY_GUARDING, /* Guard for SCI_EVT check */
EC_FLAGS_GPE_HANDLER_INSTALLED, /* GPE handler installed */
EC_FLAGS_EC_HANDLER_INSTALLED, /* OpReg handler installed */
+ EC_FLAGS_EVT_HANDLER_INSTALLED, /* _Qxx handlers installed */
EC_FLAGS_STARTED, /* Driver is started */
EC_FLAGS_STOPPED, /* Driver is stopped */
EC_FLAGS_COMMAND_STORM, /* GPE storms occurred to the
@@ -145,6 +147,10 @@
module_param(ec_storm_threshold, uint, 0644);
MODULE_PARM_DESC(ec_storm_threshold, "Maxim false GPE numbers not considered as GPE storm");
+static bool ec_freeze_events __read_mostly = true;
+module_param(ec_freeze_events, bool, 0644);
+MODULE_PARM_DESC(ec_freeze_events, "Disabling event handling during suspend/resume");
+
struct acpi_ec_query_handler {
struct list_head node;
acpi_ec_query_func func;
@@ -179,6 +185,7 @@
struct acpi_ec *boot_ec, *first_ec;
EXPORT_SYMBOL(first_ec);
+static bool boot_ec_is_ecdt = false;
static struct workqueue_struct *ec_query_wq;
static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */
@@ -239,6 +246,30 @@
!test_bit(EC_FLAGS_STOPPED, &ec->flags);
}
+static bool acpi_ec_event_enabled(struct acpi_ec *ec)
+{
+ /*
+ * There is an OSPM early stage logic. During the early stages
+ * (boot/resume), OSPMs shouldn't enable the event handling, only
+ * the EC transactions are allowed to be performed.
+ */
+ if (!test_bit(EC_FLAGS_QUERY_ENABLED, &ec->flags))
+ return false;
+ /*
+ * However, disabling the event handling is experimental for late
+ * stage (suspend), and is controlled by the boot parameter of
+ * "ec_freeze_events":
+ * 1. true: The EC event handling is disabled before entering
+ * the noirq stage.
+ * 2. false: The EC event handling is automatically disabled as
+ * soon as the EC driver is stopped.
+ */
+ if (ec_freeze_events)
+ return acpi_ec_started(ec);
+ else
+ return test_bit(EC_FLAGS_STARTED, &ec->flags);
+}
+
static bool acpi_ec_flushed(struct acpi_ec *ec)
{
return ec->reference_count == 1;
@@ -429,7 +460,8 @@
static void acpi_ec_submit_query(struct acpi_ec *ec)
{
- if (!test_and_set_bit(EC_FLAGS_QUERY_PENDING, &ec->flags)) {
+ if (acpi_ec_event_enabled(ec) &&
+ !test_and_set_bit(EC_FLAGS_QUERY_PENDING, &ec->flags)) {
ec_dbg_evt("Command(%s) submitted/blocked",
acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY));
ec->nr_pending_queries++;
@@ -446,6 +478,86 @@
}
}
+static inline void __acpi_ec_enable_event(struct acpi_ec *ec)
+{
+ if (!test_and_set_bit(EC_FLAGS_QUERY_ENABLED, &ec->flags))
+ ec_log_drv("event unblocked");
+ if (!test_bit(EC_FLAGS_QUERY_PENDING, &ec->flags))
+ advance_transaction(ec);
+}
+
+static inline void __acpi_ec_disable_event(struct acpi_ec *ec)
+{
+ if (test_and_clear_bit(EC_FLAGS_QUERY_ENABLED, &ec->flags))
+ ec_log_drv("event blocked");
+}
+
+/*
+ * Process _Q events that might have accumulated in the EC.
+ * Run with locked ec mutex.
+ */
+static void acpi_ec_clear(struct acpi_ec *ec)
+{
+ int i, status;
+ u8 value = 0;
+
+ for (i = 0; i < ACPI_EC_CLEAR_MAX; i++) {
+ status = acpi_ec_query(ec, &value);
+ if (status || !value)
+ break;
+ }
+ if (unlikely(i == ACPI_EC_CLEAR_MAX))
+ pr_warn("Warning: Maximum of %d stale EC events cleared\n", i);
+ else
+ pr_info("%d stale EC events cleared\n", i);
+}
+
+static void acpi_ec_enable_event(struct acpi_ec *ec)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ec->lock, flags);
+ if (acpi_ec_started(ec))
+ __acpi_ec_enable_event(ec);
+ spin_unlock_irqrestore(&ec->lock, flags);
+
+ /* Drain additional events if hardware requires that */
+ if (EC_FLAGS_CLEAR_ON_RESUME)
+ acpi_ec_clear(ec);
+}
+
+static bool acpi_ec_query_flushed(struct acpi_ec *ec)
+{
+ bool flushed;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ec->lock, flags);
+ flushed = !ec->nr_pending_queries;
+ spin_unlock_irqrestore(&ec->lock, flags);
+ return flushed;
+}
+
+static void __acpi_ec_flush_event(struct acpi_ec *ec)
+{
+ /*
+ * When ec_freeze_events is true, we need to flush events in
+ * the proper position before entering the noirq stage.
+ */
+ wait_event(ec->wait, acpi_ec_query_flushed(ec));
+ if (ec_query_wq)
+ flush_workqueue(ec_query_wq);
+}
+
+static void acpi_ec_disable_event(struct acpi_ec *ec)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ec->lock, flags);
+ __acpi_ec_disable_event(ec);
+ spin_unlock_irqrestore(&ec->lock, flags);
+ __acpi_ec_flush_event(ec);
+}
+
static bool acpi_ec_guard_event(struct acpi_ec *ec)
{
bool guarded = true;
@@ -832,27 +944,6 @@
}
EXPORT_SYMBOL(ec_get_handle);
-/*
- * Process _Q events that might have accumulated in the EC.
- * Run with locked ec mutex.
- */
-static void acpi_ec_clear(struct acpi_ec *ec)
-{
- int i, status;
- u8 value = 0;
-
- for (i = 0; i < ACPI_EC_CLEAR_MAX; i++) {
- status = acpi_ec_query(ec, &value);
- if (status || !value)
- break;
- }
-
- if (unlikely(i == ACPI_EC_CLEAR_MAX))
- pr_warn("Warning: Maximum of %d stale EC events cleared\n", i);
- else
- pr_info("%d stale EC events cleared\n", i);
-}
-
static void acpi_ec_start(struct acpi_ec *ec, bool resuming)
{
unsigned long flags;
@@ -896,7 +987,8 @@
if (!suspending) {
acpi_ec_complete_request(ec);
ec_dbg_ref(ec, "Decrease driver");
- }
+ } else if (!ec_freeze_events)
+ __acpi_ec_disable_event(ec);
clear_bit(EC_FLAGS_STARTED, &ec->flags);
clear_bit(EC_FLAGS_STOPPED, &ec->flags);
ec_log_drv("EC stopped");
@@ -919,20 +1011,6 @@
void acpi_ec_unblock_transactions(void)
{
- struct acpi_ec *ec = first_ec;
-
- if (!ec)
- return;
-
- /* Allow transactions to be carried out again */
- acpi_ec_start(ec, true);
-
- if (EC_FLAGS_CLEAR_ON_RESUME)
- acpi_ec_clear(ec);
-}
-
-void acpi_ec_unblock_transactions_early(void)
-{
/*
* Allow transactions to happen again (this function is called from
* atomic context during wakeup, so we don't need to acquire the mutex).
@@ -1228,13 +1306,21 @@
static acpi_status
ec_parse_io_ports(struct acpi_resource *resource, void *context);
-static struct acpi_ec *make_acpi_ec(void)
+static void acpi_ec_free(struct acpi_ec *ec)
+{
+ if (first_ec == ec)
+ first_ec = NULL;
+ if (boot_ec == ec)
+ boot_ec = NULL;
+ kfree(ec);
+}
+
+static struct acpi_ec *acpi_ec_alloc(void)
{
struct acpi_ec *ec = kzalloc(sizeof(struct acpi_ec), GFP_KERNEL);
if (!ec)
return NULL;
- ec->flags = 1 << EC_FLAGS_QUERY_PENDING;
mutex_init(&ec->mutex);
init_waitqueue_head(&ec->wait);
INIT_LIST_HEAD(&ec->list);
@@ -1290,7 +1376,12 @@
return AE_CTRL_TERMINATE;
}
-static int ec_install_handlers(struct acpi_ec *ec)
+/*
+ * Note: This function returns an error code only when the address space
+ * handler is not installed, which means "not able to handle
+ * transactions".
+ */
+static int ec_install_handlers(struct acpi_ec *ec, bool handle_events)
{
acpi_status status;
@@ -1319,6 +1410,16 @@
set_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags);
}
+ if (!handle_events)
+ return 0;
+
+ if (!test_bit(EC_FLAGS_EVT_HANDLER_INSTALLED, &ec->flags)) {
+ /* Find and register all query methods */
+ acpi_walk_namespace(ACPI_TYPE_METHOD, ec->handle, 1,
+ acpi_ec_register_query_methods,
+ NULL, ec, NULL);
+ set_bit(EC_FLAGS_EVT_HANDLER_INSTALLED, &ec->flags);
+ }
if (!test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags)) {
status = acpi_install_gpe_raw_handler(NULL, ec->gpe,
ACPI_GPE_EDGE_TRIGGERED,
@@ -1329,6 +1430,9 @@
if (test_bit(EC_FLAGS_STARTED, &ec->flags) &&
ec->reference_count >= 1)
acpi_ec_enable_gpe(ec, true);
+
+ /* EC is fully operational, allow queries */
+ acpi_ec_enable_event(ec);
}
}
@@ -1363,23 +1467,104 @@
pr_err("failed to remove gpe handler\n");
clear_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags);
}
+ if (test_bit(EC_FLAGS_EVT_HANDLER_INSTALLED, &ec->flags)) {
+ acpi_ec_remove_query_handlers(ec, true, 0);
+ clear_bit(EC_FLAGS_EVT_HANDLER_INSTALLED, &ec->flags);
+ }
}
-static struct acpi_ec *acpi_ec_alloc(void)
+static int acpi_ec_setup(struct acpi_ec *ec, bool handle_events)
{
- struct acpi_ec *ec;
+ int ret;
- /* Check for boot EC */
- if (boot_ec) {
- ec = boot_ec;
- boot_ec = NULL;
- ec_remove_handlers(ec);
- if (first_ec == ec)
- first_ec = NULL;
- } else {
- ec = make_acpi_ec();
+ ret = ec_install_handlers(ec, handle_events);
+ if (ret)
+ return ret;
+
+ /* First EC capable of handling transactions */
+ if (!first_ec) {
+ first_ec = ec;
+ acpi_handle_info(first_ec->handle, "Used as first EC\n");
}
- return ec;
+
+ acpi_handle_info(ec->handle,
+ "GPE=0x%lx, EC_CMD/EC_SC=0x%lx, EC_DATA=0x%lx\n",
+ ec->gpe, ec->command_addr, ec->data_addr);
+ return ret;
+}
+
+static int acpi_config_boot_ec(struct acpi_ec *ec, acpi_handle handle,
+ bool handle_events, bool is_ecdt)
+{
+ int ret;
+
+ /*
+ * Changing the ACPI handle results in a re-configuration of the
+ * boot EC. And if it happens after the namespace initialization,
+ * it causes _REG evaluations.
+ */
+ if (boot_ec && boot_ec->handle != handle)
+ ec_remove_handlers(boot_ec);
+
+ /* Unset old boot EC */
+ if (boot_ec != ec)
+ acpi_ec_free(boot_ec);
+
+ /*
+ * ECDT device creation is split into acpi_ec_ecdt_probe() and
+ * acpi_ec_ecdt_start(). This function takes care of completing the
+ * ECDT parsing logic as the handle update should be performed
+ * between the installation/uninstallation of the handlers.
+ */
+ if (ec->handle != handle)
+ ec->handle = handle;
+
+ ret = acpi_ec_setup(ec, handle_events);
+ if (ret)
+ return ret;
+
+ /* Set new boot EC */
+ if (!boot_ec) {
+ boot_ec = ec;
+ boot_ec_is_ecdt = is_ecdt;
+ }
+
+ acpi_handle_info(boot_ec->handle,
+ "Used as boot %s EC to handle transactions%s\n",
+ is_ecdt ? "ECDT" : "DSDT",
+ handle_events ? " and events" : "");
+ return ret;
+}
+
+static bool acpi_ec_ecdt_get_handle(acpi_handle *phandle)
+{
+ struct acpi_table_ecdt *ecdt_ptr;
+ acpi_status status;
+ acpi_handle handle;
+
+ status = acpi_get_table(ACPI_SIG_ECDT, 1,
+ (struct acpi_table_header **)&ecdt_ptr);
+ if (ACPI_FAILURE(status))
+ return false;
+
+ status = acpi_get_handle(NULL, ecdt_ptr->id, &handle);
+ if (ACPI_FAILURE(status))
+ return false;
+
+ *phandle = handle;
+ return true;
+}
+
+static bool acpi_is_boot_ec(struct acpi_ec *ec)
+{
+ if (!boot_ec)
+ return false;
+ if (ec->handle == boot_ec->handle &&
+ ec->gpe == boot_ec->gpe &&
+ ec->command_addr == boot_ec->command_addr &&
+ ec->data_addr == boot_ec->data_addr)
+ return true;
+ return false;
}
static int acpi_ec_add(struct acpi_device *device)
@@ -1395,16 +1580,21 @@
return -ENOMEM;
if (ec_parse_device(device->handle, 0, ec, NULL) !=
AE_CTRL_TERMINATE) {
- kfree(ec);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_alloc;
}
- /* Find and register all query methods */
- acpi_walk_namespace(ACPI_TYPE_METHOD, ec->handle, 1,
- acpi_ec_register_query_methods, NULL, ec, NULL);
+ if (acpi_is_boot_ec(ec)) {
+ boot_ec_is_ecdt = false;
+ acpi_handle_debug(ec->handle, "duplicated.\n");
+ acpi_ec_free(ec);
+ ec = boot_ec;
+ ret = acpi_config_boot_ec(ec, ec->handle, true, false);
+ } else
+ ret = acpi_ec_setup(ec, true);
+ if (ret)
+ goto err_query;
- if (!first_ec)
- first_ec = ec;
device->driver_data = ec;
ret = !!request_region(ec->data_addr, 1, "EC data");
@@ -1412,20 +1602,17 @@
ret = !!request_region(ec->command_addr, 1, "EC cmd");
WARN(!ret, "Could not request EC cmd io port 0x%lx", ec->command_addr);
- pr_info("GPE = 0x%lx, I/O: command/status = 0x%lx, data = 0x%lx\n",
- ec->gpe, ec->command_addr, ec->data_addr);
-
- ret = ec_install_handlers(ec);
-
/* Reprobe devices depending on the EC */
acpi_walk_dep_device_list(ec->handle);
+ acpi_handle_debug(ec->handle, "enumerated.\n");
+ return 0;
- /* EC is fully operational, allow queries */
- clear_bit(EC_FLAGS_QUERY_PENDING, &ec->flags);
-
- /* Clear stale _Q events if hardware might require that */
- if (EC_FLAGS_CLEAR_ON_RESUME)
- acpi_ec_clear(ec);
+err_query:
+ if (ec != boot_ec)
+ acpi_ec_remove_query_handlers(ec, true, 0);
+err_alloc:
+ if (ec != boot_ec)
+ acpi_ec_free(ec);
return ret;
}
@@ -1437,14 +1624,13 @@
return -EINVAL;
ec = acpi_driver_data(device);
- ec_remove_handlers(ec);
- acpi_ec_remove_query_handlers(ec, true, 0);
release_region(ec->data_addr, 1);
release_region(ec->command_addr, 1);
device->driver_data = NULL;
- if (ec == first_ec)
- first_ec = NULL;
- kfree(ec);
+ if (ec != boot_ec) {
+ ec_remove_handlers(ec);
+ acpi_ec_free(ec);
+ }
return 0;
}
@@ -1486,9 +1672,8 @@
if (!ec)
return -ENOMEM;
/*
- * Finding EC from DSDT if there is no ECDT EC available. When this
- * function is invoked, ACPI tables have been fully loaded, we can
- * walk namespace now.
+ * At this point, the namespace is initialized, so start to find
+ * the namespace objects.
*/
status = acpi_get_devices(ec_device_ids[0].id,
ec_parse_device, ec, NULL);
@@ -1496,16 +1681,47 @@
ret = -ENODEV;
goto error;
}
- ret = ec_install_handlers(ec);
-
+ /*
+ * When the DSDT EC is available, always re-configure boot EC to
+ * have _REG evaluated. _REG can only be evaluated after the
+ * namespace initialization.
+ * At this point, the GPE is not fully initialized, so do not to
+ * handle the events.
+ */
+ ret = acpi_config_boot_ec(ec, ec->handle, false, false);
error:
if (ret)
- kfree(ec);
- else
- first_ec = boot_ec = ec;
+ acpi_ec_free(ec);
return ret;
}
+/*
+ * If the DSDT EC is not functioning, we still need to prepare a fully
+ * functioning ECDT EC first in order to handle the events.
+ * https://bugzilla.kernel.org/show_bug.cgi?id=115021
+ */
+int __init acpi_ec_ecdt_start(void)
+{
+ acpi_handle handle;
+
+ if (!boot_ec)
+ return -ENODEV;
+ /*
+ * The DSDT EC should have already been started in
+ * acpi_ec_add().
+ */
+ if (!boot_ec_is_ecdt)
+ return -ENODEV;
+
+ /*
+ * At this point, the namespace and the GPE is initialized, so
+ * start to find the namespace objects and handle the events.
+ */
+ if (!acpi_ec_ecdt_get_handle(&handle))
+ return -ENODEV;
+ return acpi_config_boot_ec(boot_ec, handle, true, true);
+}
+
#if 0
/*
* Some EC firmware variations refuses to respond QR_EC when SCI_EVT is not
@@ -1600,7 +1816,6 @@
goto error;
}
- pr_info("EC description table is found, configuring boot EC\n");
if (EC_FLAGS_CORRECT_ECDT) {
ec->command_addr = ecdt_ptr->data.address;
ec->data_addr = ecdt_ptr->control.address;
@@ -1609,16 +1824,90 @@
ec->data_addr = ecdt_ptr->data.address;
}
ec->gpe = ecdt_ptr->gpe;
- ec->handle = ACPI_ROOT_OBJECT;
- ret = ec_install_handlers(ec);
+
+ /*
+ * At this point, the namespace is not initialized, so do not find
+ * the namespace objects, or handle the events.
+ */
+ ret = acpi_config_boot_ec(ec, ACPI_ROOT_OBJECT, false, true);
error:
if (ret)
- kfree(ec);
- else
- first_ec = boot_ec = ec;
+ acpi_ec_free(ec);
return ret;
}
+#ifdef CONFIG_PM_SLEEP
+static void acpi_ec_enter_noirq(struct acpi_ec *ec)
+{
+ unsigned long flags;
+
+ if (ec == first_ec) {
+ spin_lock_irqsave(&ec->lock, flags);
+ ec->saved_busy_polling = ec_busy_polling;
+ ec->saved_polling_guard = ec_polling_guard;
+ ec_busy_polling = true;
+ ec_polling_guard = 0;
+ ec_log_drv("interrupt blocked");
+ spin_unlock_irqrestore(&ec->lock, flags);
+ }
+}
+
+static void acpi_ec_leave_noirq(struct acpi_ec *ec)
+{
+ unsigned long flags;
+
+ if (ec == first_ec) {
+ spin_lock_irqsave(&ec->lock, flags);
+ ec_busy_polling = ec->saved_busy_polling;
+ ec_polling_guard = ec->saved_polling_guard;
+ ec_log_drv("interrupt unblocked");
+ spin_unlock_irqrestore(&ec->lock, flags);
+ }
+}
+
+static int acpi_ec_suspend_noirq(struct device *dev)
+{
+ struct acpi_ec *ec =
+ acpi_driver_data(to_acpi_device(dev));
+
+ acpi_ec_enter_noirq(ec);
+ return 0;
+}
+
+static int acpi_ec_resume_noirq(struct device *dev)
+{
+ struct acpi_ec *ec =
+ acpi_driver_data(to_acpi_device(dev));
+
+ acpi_ec_leave_noirq(ec);
+ return 0;
+}
+
+static int acpi_ec_suspend(struct device *dev)
+{
+ struct acpi_ec *ec =
+ acpi_driver_data(to_acpi_device(dev));
+
+ if (ec_freeze_events)
+ acpi_ec_disable_event(ec);
+ return 0;
+}
+
+static int acpi_ec_resume(struct device *dev)
+{
+ struct acpi_ec *ec =
+ acpi_driver_data(to_acpi_device(dev));
+
+ acpi_ec_enable_event(ec);
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops acpi_ec_pm = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(acpi_ec_suspend_noirq, acpi_ec_resume_noirq)
+ SET_SYSTEM_SLEEP_PM_OPS(acpi_ec_suspend, acpi_ec_resume)
+};
+
static int param_set_event_clearing(const char *val, struct kernel_param *kp)
{
int result = 0;
@@ -1664,6 +1953,7 @@
.add = acpi_ec_add,
.remove = acpi_ec_remove,
},
+ .drv.pm = &acpi_ec_pm,
};
static inline int acpi_ec_query_init(void)
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 940218f..1b41a27 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -40,10 +40,8 @@
void acpi_container_init(void);
void acpi_memory_hotplug_init(void);
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
-int acpi_ioapic_add(struct acpi_pci_root *root);
int acpi_ioapic_remove(struct acpi_pci_root *root);
#else
-static inline int acpi_ioapic_add(struct acpi_pci_root *root) { return 0; }
static inline int acpi_ioapic_remove(struct acpi_pci_root *root) { return 0; }
#endif
#ifdef CONFIG_ACPI_DOCK
@@ -116,7 +114,6 @@
bool acpi_device_is_battery(struct acpi_device *adev);
bool acpi_device_is_first_physical_node(struct acpi_device *adev,
const struct device *dev);
-struct device *acpi_get_first_physical_node(struct acpi_device *adev);
/* --------------------------------------------------------------------------
Device Matching and Notification
@@ -174,6 +171,8 @@
struct work_struct work;
unsigned long timestamp;
unsigned long nr_pending_queries;
+ bool saved_busy_polling;
+ unsigned int saved_polling_guard;
};
extern struct acpi_ec *first_ec;
@@ -185,9 +184,9 @@
int acpi_ec_init(void);
int acpi_ec_ecdt_probe(void);
int acpi_ec_dsdt_probe(void);
+int acpi_ec_ecdt_start(void);
void acpi_ec_block_transactions(void);
void acpi_ec_unblock_transactions(void);
-void acpi_ec_unblock_transactions_early(void);
int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
acpi_handle handle, acpi_ec_query_func func,
void *data);
@@ -225,4 +224,14 @@
void acpi_init_properties(struct acpi_device *adev);
void acpi_free_properties(struct acpi_device *adev);
+/*--------------------------------------------------------------------------
+ Watchdog
+ -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI_WATCHDOG
+void acpi_watchdog_init(void);
+#else
+static inline void acpi_watchdog_init(void) {}
+#endif
+
#endif /* _ACPI_INTERNAL_H_ */
diff --git a/drivers/acpi/ioapic.c b/drivers/acpi/ioapic.c
index ccdc8db..6d7ce6e 100644
--- a/drivers/acpi/ioapic.c
+++ b/drivers/acpi/ioapic.c
@@ -46,7 +46,7 @@
struct resource_win win;
res->flags = 0;
- if (acpi_dev_filter_resource_type(acpi_res, IORESOURCE_MEM) == 0)
+ if (acpi_dev_filter_resource_type(acpi_res, IORESOURCE_MEM))
return AE_OK;
if (!acpi_dev_resource_memory(acpi_res, res)) {
@@ -97,7 +97,7 @@
unsigned long long gsi_base;
struct acpi_pci_ioapic *ioapic;
struct pci_dev *dev = NULL;
- struct resource *res = NULL;
+ struct resource *res = NULL, *pci_res = NULL, *crs_res;
char *type = NULL;
if (!acpi_is_ioapic(handle, &type))
@@ -137,23 +137,30 @@
pci_set_master(dev);
if (pci_request_region(dev, 0, type))
goto exit_disable;
- res = &dev->resource[0];
+ pci_res = &dev->resource[0];
ioapic->pdev = dev;
} else {
pci_dev_put(dev);
dev = NULL;
-
- res = &ioapic->res;
- acpi_walk_resources(handle, METHOD_NAME__CRS, setup_res, res);
- if (res->flags == 0) {
- acpi_handle_warn(handle, "failed to get resource\n");
- goto exit_free;
- } else if (request_resource(&iomem_resource, res)) {
- acpi_handle_warn(handle, "failed to insert resource\n");
- goto exit_free;
- }
}
+ crs_res = &ioapic->res;
+ acpi_walk_resources(handle, METHOD_NAME__CRS, setup_res, crs_res);
+ crs_res->name = type;
+ crs_res->flags |= IORESOURCE_BUSY;
+ if (crs_res->flags == 0) {
+ acpi_handle_warn(handle, "failed to get resource\n");
+ goto exit_release;
+ } else if (insert_resource(&iomem_resource, crs_res)) {
+ acpi_handle_warn(handle, "failed to insert resource\n");
+ goto exit_release;
+ }
+
+ /* try pci resource first, then "_CRS" resource */
+ res = pci_res;
+ if (!res || !res->flags)
+ res = crs_res;
+
if (acpi_register_ioapic(handle, res->start, (u32)gsi_base)) {
acpi_handle_warn(handle, "failed to register IOAPIC\n");
goto exit_release;
@@ -174,14 +181,13 @@
exit_release:
if (dev)
pci_release_region(dev, 0);
- else
- release_resource(res);
+ if (ioapic->res.flags && ioapic->res.parent)
+ release_resource(&ioapic->res);
exit_disable:
if (dev)
pci_disable_device(dev);
exit_put:
pci_dev_put(dev);
-exit_free:
kfree(ioapic);
exit:
mutex_unlock(&ioapic_list_lock);
@@ -189,13 +195,13 @@
return AE_OK;
}
-int acpi_ioapic_add(struct acpi_pci_root *root)
+int acpi_ioapic_add(acpi_handle root_handle)
{
acpi_status status, retval = AE_OK;
- status = acpi_walk_namespace(ACPI_TYPE_DEVICE, root->device->handle,
+ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, root_handle,
UINT_MAX, handle_ioapic_add, NULL,
- root->device->handle, (void **)&retval);
+ root_handle, (void **)&retval);
return ACPI_SUCCESS(status) && ACPI_SUCCESS(retval) ? 0 : -ENODEV;
}
@@ -217,9 +223,9 @@
pci_release_region(ioapic->pdev, 0);
pci_disable_device(ioapic->pdev);
pci_dev_put(ioapic->pdev);
- } else if (ioapic->res.flags && ioapic->res.parent) {
- release_resource(&ioapic->res);
}
+ if (ioapic->res.flags && ioapic->res.parent)
+ release_resource(&ioapic->res);
list_del(&ioapic->list);
kfree(ioapic);
}
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 80cc7c0..e1d5ea6 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -94,54 +94,50 @@
return to_acpi_device(acpi_desc->dev);
}
-static int xlat_status(void *buf, unsigned int cmd)
+static int xlat_status(void *buf, unsigned int cmd, u32 status)
{
struct nd_cmd_clear_error *clear_err;
struct nd_cmd_ars_status *ars_status;
- struct nd_cmd_ars_start *ars_start;
- struct nd_cmd_ars_cap *ars_cap;
u16 flags;
switch (cmd) {
case ND_CMD_ARS_CAP:
- ars_cap = buf;
- if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE)
+ if ((status & 0xffff) == NFIT_ARS_CAP_NONE)
return -ENOTTY;
/* Command failed */
- if (ars_cap->status & 0xffff)
+ if (status & 0xffff)
return -EIO;
/* No supported scan types for this range */
flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE;
- if ((ars_cap->status >> 16 & flags) == 0)
+ if ((status >> 16 & flags) == 0)
return -ENOTTY;
break;
case ND_CMD_ARS_START:
- ars_start = buf;
/* ARS is in progress */
- if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY)
+ if ((status & 0xffff) == NFIT_ARS_START_BUSY)
return -EBUSY;
/* Command failed */
- if (ars_start->status & 0xffff)
+ if (status & 0xffff)
return -EIO;
break;
case ND_CMD_ARS_STATUS:
ars_status = buf;
/* Command failed */
- if (ars_status->status & 0xffff)
+ if (status & 0xffff)
return -EIO;
/* Check extended status (Upper two bytes) */
- if (ars_status->status == NFIT_ARS_STATUS_DONE)
+ if (status == NFIT_ARS_STATUS_DONE)
return 0;
/* ARS is in progress */
- if (ars_status->status == NFIT_ARS_STATUS_BUSY)
+ if (status == NFIT_ARS_STATUS_BUSY)
return -EBUSY;
/* No ARS performed for the current boot */
- if (ars_status->status == NFIT_ARS_STATUS_NONE)
+ if (status == NFIT_ARS_STATUS_NONE)
return -EAGAIN;
/*
@@ -149,19 +145,19 @@
* agent wants the scan to stop. If we didn't overflow
* then just continue with the returned results.
*/
- if (ars_status->status == NFIT_ARS_STATUS_INTR) {
+ if (status == NFIT_ARS_STATUS_INTR) {
if (ars_status->flags & NFIT_ARS_F_OVERFLOW)
return -ENOSPC;
return 0;
}
/* Unknown status */
- if (ars_status->status >> 16)
+ if (status >> 16)
return -EIO;
break;
case ND_CMD_CLEAR_ERROR:
clear_err = buf;
- if (clear_err->status & 0xffff)
+ if (status & 0xffff)
return -EIO;
if (!clear_err->cleared)
return -EIO;
@@ -172,6 +168,9 @@
break;
}
+ /* all other non-zero status results in an error */
+ if (status)
+ return -EIO;
return 0;
}
@@ -186,10 +185,10 @@
struct nd_cmd_pkg *call_pkg = NULL;
const char *cmd_name, *dimm_name;
unsigned long cmd_mask, dsm_mask;
+ u32 offset, fw_status = 0;
acpi_handle handle;
unsigned int func;
const u8 *uuid;
- u32 offset;
int rc, i;
func = cmd;
@@ -317,6 +316,15 @@
out_obj->buffer.pointer + offset, out_size);
offset += out_size;
}
+
+ /*
+ * Set fw_status for all the commands with a known format to be
+ * later interpreted by xlat_status().
+ */
+ if (i >= 1 && ((cmd >= ND_CMD_ARS_CAP && cmd <= ND_CMD_CLEAR_ERROR)
+ || (cmd >= ND_CMD_SMART && cmd <= ND_CMD_VENDOR)))
+ fw_status = *(u32 *) out_obj->buffer.pointer;
+
if (offset + in_buf.buffer.length < buf_len) {
if (i >= 1) {
/*
@@ -325,7 +333,7 @@
*/
rc = buf_len - offset - in_buf.buffer.length;
if (cmd_rc)
- *cmd_rc = xlat_status(buf, cmd);
+ *cmd_rc = xlat_status(buf, cmd, fw_status);
} else {
dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
__func__, dimm_name, cmd_name, buf_len,
@@ -335,7 +343,7 @@
} else {
rc = 0;
if (cmd_rc)
- *cmd_rc = xlat_status(buf, cmd);
+ *cmd_rc = xlat_status(buf, cmd, fw_status);
}
out:
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 2c45dd3..c576a6f 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -411,7 +411,15 @@
int gsi;
u8 pin;
int triggering = ACPI_LEVEL_SENSITIVE;
- int polarity = ACPI_ACTIVE_LOW;
+ /*
+ * On ARM systems with the GIC interrupt model, level interrupts
+ * are always polarity high by specification; PCI legacy
+ * IRQs lines are inverted before reaching the interrupt
+ * controller and must therefore be considered active high
+ * as default.
+ */
+ int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ?
+ ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW;
char *link = NULL;
char link_desc[16];
int rc;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index d144168..bf601d4 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -614,7 +614,17 @@
if (hotadd) {
pcibios_resource_survey_bus(root->bus);
pci_assign_unassigned_root_bus_resources(root->bus);
- acpi_ioapic_add(root);
+ /*
+ * This is only called for the hotadd case. For the boot-time
+ * case, we need to wait until after PCI initialization in
+ * order to deal with IOAPICs mapped in on a PCI BAR.
+ *
+ * This is currently x86-specific, because acpi_ioapic_add()
+ * is an empty function without CONFIG_ACPI_HOTPLUG_IOAPIC.
+ * And CONFIG_ACPI_HOTPLUG_IOAPIC depends on CONFIG_X86_IO_APIC
+ * (see drivers/acpi/Kconfig).
+ */
+ acpi_ioapic_add(root->device->handle);
}
pci_lock_rescan_remove();
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 9125d7d..5c78ee1 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -32,12 +32,12 @@
}
static int map_lapic_id(struct acpi_subtable_header *entry,
- u32 acpi_id, phys_cpuid_t *apic_id)
+ u32 acpi_id, phys_cpuid_t *apic_id, bool ignore_disabled)
{
struct acpi_madt_local_apic *lapic =
container_of(entry, struct acpi_madt_local_apic, header);
- if (!(lapic->lapic_flags & ACPI_MADT_ENABLED))
+ if (ignore_disabled && !(lapic->lapic_flags & ACPI_MADT_ENABLED))
return -ENODEV;
if (lapic->processor_id != acpi_id)
@@ -48,12 +48,13 @@
}
static int map_x2apic_id(struct acpi_subtable_header *entry,
- int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
+ int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
+ bool ignore_disabled)
{
struct acpi_madt_local_x2apic *apic =
container_of(entry, struct acpi_madt_local_x2apic, header);
- if (!(apic->lapic_flags & ACPI_MADT_ENABLED))
+ if (ignore_disabled && !(apic->lapic_flags & ACPI_MADT_ENABLED))
return -ENODEV;
if (device_declaration && (apic->uid == acpi_id)) {
@@ -65,12 +66,13 @@
}
static int map_lsapic_id(struct acpi_subtable_header *entry,
- int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
+ int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
+ bool ignore_disabled)
{
struct acpi_madt_local_sapic *lsapic =
container_of(entry, struct acpi_madt_local_sapic, header);
- if (!(lsapic->lapic_flags & ACPI_MADT_ENABLED))
+ if (ignore_disabled && !(lsapic->lapic_flags & ACPI_MADT_ENABLED))
return -ENODEV;
if (device_declaration) {
@@ -87,12 +89,13 @@
* Retrieve the ARM CPU physical identifier (MPIDR)
*/
static int map_gicc_mpidr(struct acpi_subtable_header *entry,
- int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr)
+ int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr,
+ bool ignore_disabled)
{
struct acpi_madt_generic_interrupt *gicc =
container_of(entry, struct acpi_madt_generic_interrupt, header);
- if (!(gicc->flags & ACPI_MADT_ENABLED))
+ if (ignore_disabled && !(gicc->flags & ACPI_MADT_ENABLED))
return -ENODEV;
/* device_declaration means Device object in DSDT, in the
@@ -109,7 +112,7 @@
}
static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
- int type, u32 acpi_id)
+ int type, u32 acpi_id, bool ignore_disabled)
{
unsigned long madt_end, entry;
phys_cpuid_t phys_id = PHYS_CPUID_INVALID; /* CPU hardware ID */
@@ -127,16 +130,20 @@
struct acpi_subtable_header *header =
(struct acpi_subtable_header *)entry;
if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) {
- if (!map_lapic_id(header, acpi_id, &phys_id))
+ if (!map_lapic_id(header, acpi_id, &phys_id,
+ ignore_disabled))
break;
} else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) {
- if (!map_x2apic_id(header, type, acpi_id, &phys_id))
+ if (!map_x2apic_id(header, type, acpi_id, &phys_id,
+ ignore_disabled))
break;
} else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
- if (!map_lsapic_id(header, type, acpi_id, &phys_id))
+ if (!map_lsapic_id(header, type, acpi_id, &phys_id,
+ ignore_disabled))
break;
} else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
- if (!map_gicc_mpidr(header, type, acpi_id, &phys_id))
+ if (!map_gicc_mpidr(header, type, acpi_id, &phys_id,
+ ignore_disabled))
break;
}
entry += header->length;
@@ -156,14 +163,15 @@
if (!madt)
return PHYS_CPUID_INVALID;
- rv = map_madt_entry(madt, 1, acpi_id);
+ rv = map_madt_entry(madt, 1, acpi_id, true);
early_acpi_os_unmap_memory(madt, tbl_size);
return rv;
}
-static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
+static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id,
+ bool ignore_disabled)
{
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
union acpi_object *obj;
@@ -184,30 +192,38 @@
header = (struct acpi_subtable_header *)obj->buffer.pointer;
if (header->type == ACPI_MADT_TYPE_LOCAL_APIC)
- map_lapic_id(header, acpi_id, &phys_id);
+ map_lapic_id(header, acpi_id, &phys_id, ignore_disabled);
else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC)
- map_lsapic_id(header, type, acpi_id, &phys_id);
+ map_lsapic_id(header, type, acpi_id, &phys_id, ignore_disabled);
else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC)
- map_x2apic_id(header, type, acpi_id, &phys_id);
+ map_x2apic_id(header, type, acpi_id, &phys_id, ignore_disabled);
else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT)
- map_gicc_mpidr(header, type, acpi_id, &phys_id);
+ map_gicc_mpidr(header, type, acpi_id, &phys_id,
+ ignore_disabled);
exit:
kfree(buffer.pointer);
return phys_id;
}
-phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
+static phys_cpuid_t __acpi_get_phys_id(acpi_handle handle, int type,
+ u32 acpi_id, bool ignore_disabled)
{
phys_cpuid_t phys_id;
- phys_id = map_mat_entry(handle, type, acpi_id);
+ phys_id = map_mat_entry(handle, type, acpi_id, ignore_disabled);
if (invalid_phys_cpuid(phys_id))
- phys_id = map_madt_entry(get_madt_table(), type, acpi_id);
+ phys_id = map_madt_entry(get_madt_table(), type, acpi_id,
+ ignore_disabled);
return phys_id;
}
+phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
+{
+ return __acpi_get_phys_id(handle, type, acpi_id, true);
+}
+
int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id)
{
#ifdef CONFIG_SMP
@@ -264,6 +280,79 @@
}
EXPORT_SYMBOL_GPL(acpi_get_cpuid);
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+static bool __init
+map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid)
+{
+ int type, id;
+ u32 acpi_id;
+ acpi_status status;
+ acpi_object_type acpi_type;
+ unsigned long long tmp;
+ union acpi_object object = { 0 };
+ struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
+
+ status = acpi_get_type(handle, &acpi_type);
+ if (ACPI_FAILURE(status))
+ return false;
+
+ switch (acpi_type) {
+ case ACPI_TYPE_PROCESSOR:
+ status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+ if (ACPI_FAILURE(status))
+ return false;
+ acpi_id = object.processor.proc_id;
+
+ /* validate the acpi_id */
+ if(acpi_processor_validate_proc_id(acpi_id))
+ return false;
+ break;
+ case ACPI_TYPE_DEVICE:
+ status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
+ if (ACPI_FAILURE(status))
+ return false;
+ acpi_id = tmp;
+ break;
+ default:
+ return false;
+ }
+
+ type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
+
+ *phys_id = __acpi_get_phys_id(handle, type, acpi_id, false);
+ id = acpi_map_cpuid(*phys_id, acpi_id);
+
+ if (id < 0)
+ return false;
+ *cpuid = id;
+ return true;
+}
+
+static acpi_status __init
+set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context,
+ void **rv)
+{
+ phys_cpuid_t phys_id;
+ int cpu_id;
+
+ if (!map_processor(handle, &phys_id, &cpu_id))
+ return AE_ERROR;
+
+ acpi_map_cpu2node(handle, cpu_id, phys_id);
+ return AE_OK;
+}
+
+void __init acpi_set_processor_mapping(void)
+{
+ /* Set persistent cpu <-> node mapping for all processors. */
+ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX, set_processor_node_mapping,
+ NULL, NULL, NULL);
+}
+#else
+void __init acpi_set_processor_mapping(void) {}
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base,
u64 *phys_addr, int *ioapic_id)
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 0553aee..9d5f0c7 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -110,55 +110,46 @@
static int __acpi_processor_start(struct acpi_device *device);
-static int acpi_cpu_soft_notify(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int acpi_soft_cpu_online(unsigned int cpu)
{
- unsigned int cpu = (unsigned long)hcpu;
struct acpi_processor *pr = per_cpu(processors, cpu);
struct acpi_device *device;
- action &= ~CPU_TASKS_FROZEN;
-
- switch (action) {
- case CPU_ONLINE:
- case CPU_DEAD:
- break;
- default:
- return NOTIFY_DONE;
- }
if (!pr || acpi_bus_get_device(pr->handle, &device))
- return NOTIFY_DONE;
+ return 0;
+ /*
+ * CPU got physically hotplugged and onlined for the first time:
+ * Initialize missing things.
+ */
+ if (pr->flags.need_hotplug_init) {
+ int ret;
- if (action == CPU_ONLINE) {
- /*
- * CPU got physically hotplugged and onlined for the first time:
- * Initialize missing things.
- */
- if (pr->flags.need_hotplug_init) {
- int ret;
-
- pr_info("Will online and init hotplugged CPU: %d\n",
- pr->id);
- pr->flags.need_hotplug_init = 0;
- ret = __acpi_processor_start(device);
- WARN(ret, "Failed to start CPU: %d\n", pr->id);
- } else {
- /* Normal CPU soft online event. */
- acpi_processor_ppc_has_changed(pr, 0);
- acpi_processor_hotplug(pr);
- acpi_processor_reevaluate_tstate(pr, action);
- acpi_processor_tstate_has_changed(pr);
- }
- } else if (action == CPU_DEAD) {
- /* Invalidate flag.throttling after the CPU is offline. */
- acpi_processor_reevaluate_tstate(pr, action);
+ pr_info("Will online and init hotplugged CPU: %d\n",
+ pr->id);
+ pr->flags.need_hotplug_init = 0;
+ ret = __acpi_processor_start(device);
+ WARN(ret, "Failed to start CPU: %d\n", pr->id);
+ } else {
+ /* Normal CPU soft online event. */
+ acpi_processor_ppc_has_changed(pr, 0);
+ acpi_processor_hotplug(pr);
+ acpi_processor_reevaluate_tstate(pr, false);
+ acpi_processor_tstate_has_changed(pr);
}
- return NOTIFY_OK;
+ return 0;
}
-static struct notifier_block acpi_cpu_notifier = {
- .notifier_call = acpi_cpu_soft_notify,
-};
+static int acpi_soft_cpu_dead(unsigned int cpu)
+{
+ struct acpi_processor *pr = per_cpu(processors, cpu);
+ struct acpi_device *device;
+
+ if (!pr || acpi_bus_get_device(pr->handle, &device))
+ return 0;
+
+ acpi_processor_reevaluate_tstate(pr, true);
+ return 0;
+}
#ifdef CONFIG_ACPI_CPU_FREQ_PSS
static int acpi_pss_perf_init(struct acpi_processor *pr,
@@ -245,8 +236,8 @@
return 0;
result = acpi_cppc_processor_probe(pr);
- if (result)
- return -ENODEV;
+ if (result && !IS_ENABLED(CONFIG_ACPI_CPU_FREQ_PSS))
+ dev_warn(&device->dev, "CPPC data invalid or not present\n");
if (!cpuidle_get_driver() || cpuidle_get_driver() == &acpi_idle_driver)
acpi_processor_power_init(pr);
@@ -303,7 +294,7 @@
* This is needed for the powernow-k8 driver, that works even without
* ACPI, but needs symbols from this driver
*/
-
+static enum cpuhp_state hp_online;
static int __init acpi_processor_driver_init(void)
{
int result = 0;
@@ -315,11 +306,22 @@
if (result < 0)
return result;
- register_hotcpu_notifier(&acpi_cpu_notifier);
+ result = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "acpi/cpu-drv:online",
+ acpi_soft_cpu_online, NULL);
+ if (result < 0)
+ goto err;
+ hp_online = result;
+ cpuhp_setup_state_nocalls(CPUHP_ACPI_CPUDRV_DEAD, "acpi/cpu-drv:dead",
+ NULL, acpi_soft_cpu_dead);
+
acpi_thermal_cpufreq_init();
acpi_processor_ppc_init();
acpi_processor_throttling_init();
return 0;
+err:
+ driver_unregister(&acpi_processor_driver);
+ return result;
}
static void __exit acpi_processor_driver_exit(void)
@@ -329,7 +331,8 @@
acpi_processor_ppc_exit();
acpi_thermal_cpufreq_exit();
- unregister_hotcpu_notifier(&acpi_cpu_notifier);
+ cpuhp_remove_state_nocalls(hp_online);
+ cpuhp_remove_state_nocalls(CPUHP_ACPI_CPUDRV_DEAD);
driver_unregister(&acpi_processor_driver);
}
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index c72e648..d51ca1c 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -375,11 +375,11 @@
* 3. TSD domain
*/
void acpi_processor_reevaluate_tstate(struct acpi_processor *pr,
- unsigned long action)
+ bool is_dead)
{
int result = 0;
- if (action == CPU_DEAD) {
+ if (is_dead) {
/* When one CPU is offline, the T-state throttling
* will be invalidated.
*/
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index e878fc7..035ac64 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2002,6 +2002,7 @@
acpi_pnp_init();
acpi_int340x_thermal_init();
acpi_amba_init();
+ acpi_watchdog_init();
acpi_scan_add_handler(&generic_device_handler);
@@ -2044,6 +2045,7 @@
}
acpi_update_all_gpes();
+ acpi_ec_ecdt_start();
acpi_scan_initialized = true;
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 2b38c1b..deb0ff7 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -572,7 +572,7 @@
acpi_get_event_status(ACPI_EVENT_POWER_BUTTON, &pwr_btn_status);
- if (pwr_btn_status & ACPI_EVENT_FLAG_SET) {
+ if (pwr_btn_status & ACPI_EVENT_FLAG_STATUS_SET) {
acpi_clear_event(ACPI_EVENT_POWER_BUTTON);
/* Flag for later */
pwr_btn_event_pending = true;
@@ -586,7 +586,7 @@
*/
acpi_disable_all_gpes();
/* Allow EC transactions to happen. */
- acpi_ec_unblock_transactions_early();
+ acpi_ec_unblock_transactions();
suspend_nvs_restore();
@@ -784,7 +784,7 @@
/* Restore the NVS memory area */
suspend_nvs_restore();
/* Allow EC transactions to happen. */
- acpi_ec_unblock_transactions_early();
+ acpi_ec_unblock_transactions();
}
static void acpi_pm_thaw(void)
diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
new file mode 100644
index 0000000..e8d7bc7
--- /dev/null
+++ b/drivers/acpi/spcr.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2012, Intel Corporation
+ * Copyright (c) 2015, Red Hat, Inc.
+ * Copyright (c) 2015, 2016 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define pr_fmt(fmt) "ACPI: SPCR: " fmt
+
+#include <linux/acpi.h>
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/serial_core.h>
+
+/**
+ * parse_spcr() - parse ACPI SPCR table and add preferred console
+ *
+ * @earlycon: set up earlycon for the console specified by the table
+ *
+ * For the architectures with support for ACPI, CONFIG_ACPI_SPCR_TABLE may be
+ * defined to parse ACPI SPCR table. As a result of the parsing preferred
+ * console is registered and if @earlycon is true, earlycon is set up.
+ *
+ * When CONFIG_ACPI_SPCR_TABLE is defined, this function should be called
+ * from arch inintialization code as soon as the DT/ACPI decision is made.
+ *
+ */
+int __init parse_spcr(bool earlycon)
+{
+ static char opts[64];
+ struct acpi_table_spcr *table;
+ acpi_size table_size;
+ acpi_status status;
+ char *uart;
+ char *iotype;
+ int baud_rate;
+ int err;
+
+ if (acpi_disabled)
+ return -ENODEV;
+
+ status = acpi_get_table_with_size(ACPI_SIG_SPCR, 0,
+ (struct acpi_table_header **)&table,
+ &table_size);
+
+ if (ACPI_FAILURE(status))
+ return -ENOENT;
+
+ if (table->header.revision < 2) {
+ err = -ENOENT;
+ pr_err("wrong table version\n");
+ goto done;
+ }
+
+ iotype = table->serial_port.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY ?
+ "mmio" : "io";
+
+ switch (table->interface_type) {
+ case ACPI_DBG2_ARM_SBSA_32BIT:
+ iotype = "mmio32";
+ /* fall through */
+ case ACPI_DBG2_ARM_PL011:
+ case ACPI_DBG2_ARM_SBSA_GENERIC:
+ case ACPI_DBG2_BCM2835:
+ uart = "pl011";
+ break;
+ case ACPI_DBG2_16550_COMPATIBLE:
+ case ACPI_DBG2_16550_SUBSET:
+ uart = "uart";
+ break;
+ default:
+ err = -ENOENT;
+ goto done;
+ }
+
+ switch (table->baud_rate) {
+ case 3:
+ baud_rate = 9600;
+ break;
+ case 4:
+ baud_rate = 19200;
+ break;
+ case 6:
+ baud_rate = 57600;
+ break;
+ case 7:
+ baud_rate = 115200;
+ break;
+ default:
+ err = -ENOENT;
+ goto done;
+ }
+
+ snprintf(opts, sizeof(opts), "%s,%s,0x%llx,%d", uart, iotype,
+ table->serial_port.address, baud_rate);
+
+ pr_info("console: %s\n", opts);
+
+ if (earlycon)
+ setup_earlycon(opts);
+
+ err = add_preferred_console(uart, 0, opts + strlen(uart) + 1);
+
+done:
+ early_acpi_os_unmap_memory((void __iomem *)table, table_size);
+ return err;
+}
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 358165e..703c26e 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -314,10 +314,14 @@
static struct kobject *dynamic_tables_kobj;
static struct kobject *hotplug_kobj;
+#define ACPI_MAX_TABLE_INSTANCES 999
+#define ACPI_INST_SIZE 4 /* including trailing 0 */
+
struct acpi_table_attr {
struct bin_attribute attr;
- char name[8];
+ char name[ACPI_NAME_SIZE];
int instance;
+ char filename[ACPI_NAME_SIZE+ACPI_INST_SIZE];
struct list_head node;
};
@@ -329,14 +333,9 @@
container_of(bin_attr, struct acpi_table_attr, attr);
struct acpi_table_header *table_header = NULL;
acpi_status status;
- char name[ACPI_NAME_SIZE];
- if (strncmp(table_attr->name, "NULL", 4))
- memcpy(name, table_attr->name, ACPI_NAME_SIZE);
- else
- memcpy(name, "\0\0\0\0", 4);
-
- status = acpi_get_table(name, table_attr->instance, &table_header);
+ status = acpi_get_table(table_attr->name, table_attr->instance,
+ &table_header);
if (ACPI_FAILURE(status))
return -ENODEV;
@@ -344,38 +343,45 @@
table_header, table_header->length);
}
-static void acpi_table_attr_init(struct acpi_table_attr *table_attr,
- struct acpi_table_header *table_header)
+static int acpi_table_attr_init(struct kobject *tables_obj,
+ struct acpi_table_attr *table_attr,
+ struct acpi_table_header *table_header)
{
struct acpi_table_header *header = NULL;
struct acpi_table_attr *attr = NULL;
+ char instance_str[ACPI_INST_SIZE];
sysfs_attr_init(&table_attr->attr.attr);
- if (table_header->signature[0] != '\0')
- memcpy(table_attr->name, table_header->signature,
- ACPI_NAME_SIZE);
- else
- memcpy(table_attr->name, "NULL", 4);
+ ACPI_MOVE_NAME(table_attr->name, table_header->signature);
list_for_each_entry(attr, &acpi_table_attr_list, node) {
- if (!memcmp(table_attr->name, attr->name, ACPI_NAME_SIZE))
+ if (ACPI_COMPARE_NAME(table_attr->name, attr->name))
if (table_attr->instance < attr->instance)
table_attr->instance = attr->instance;
}
table_attr->instance++;
+ if (table_attr->instance > ACPI_MAX_TABLE_INSTANCES) {
+ pr_warn("%4.4s: too many table instances\n",
+ table_attr->name);
+ return -ERANGE;
+ }
+ ACPI_MOVE_NAME(table_attr->filename, table_header->signature);
+ table_attr->filename[ACPI_NAME_SIZE] = '\0';
if (table_attr->instance > 1 || (table_attr->instance == 1 &&
!acpi_get_table
- (table_header->signature, 2, &header)))
- sprintf(table_attr->name + ACPI_NAME_SIZE, "%d",
- table_attr->instance);
+ (table_header->signature, 2, &header))) {
+ snprintf(instance_str, sizeof(instance_str), "%u",
+ table_attr->instance);
+ strcat(table_attr->filename, instance_str);
+ }
table_attr->attr.size = table_header->length;
table_attr->attr.read = acpi_table_show;
- table_attr->attr.attr.name = table_attr->name;
+ table_attr->attr.attr.name = table_attr->filename;
table_attr->attr.attr.mode = 0400;
- return;
+ return sysfs_create_bin_file(tables_obj, &table_attr->attr);
}
acpi_status acpi_sysfs_table_handler(u32 event, void *table, void *context)
@@ -383,21 +389,22 @@
struct acpi_table_attr *table_attr;
switch (event) {
- case ACPI_TABLE_EVENT_LOAD:
+ case ACPI_TABLE_EVENT_INSTALL:
table_attr =
kzalloc(sizeof(struct acpi_table_attr), GFP_KERNEL);
if (!table_attr)
return AE_NO_MEMORY;
- acpi_table_attr_init(table_attr, table);
- if (sysfs_create_bin_file(dynamic_tables_kobj,
- &table_attr->attr)) {
+ if (acpi_table_attr_init(dynamic_tables_kobj,
+ table_attr, table)) {
kfree(table_attr);
return AE_ERROR;
- } else
- list_add_tail(&table_attr->node, &acpi_table_attr_list);
+ }
+ list_add_tail(&table_attr->node, &acpi_table_attr_list);
break;
+ case ACPI_TABLE_EVENT_LOAD:
case ACPI_TABLE_EVENT_UNLOAD:
+ case ACPI_TABLE_EVENT_UNINSTALL:
/*
* we do not need to do anything right now
* because the table is not deleted from the
@@ -435,13 +442,12 @@
if (ACPI_FAILURE(status))
continue;
- table_attr = NULL;
table_attr = kzalloc(sizeof(*table_attr), GFP_KERNEL);
if (!table_attr)
return -ENOMEM;
- acpi_table_attr_init(table_attr, table_header);
- ret = sysfs_create_bin_file(tables_kobj, &table_attr->attr);
+ ret = acpi_table_attr_init(tables_kobj,
+ table_attr, table_header);
if (ret) {
kfree(table_attr);
return ret;
@@ -597,14 +603,27 @@
if (result)
goto end;
- if (!(status & ACPI_EVENT_FLAG_HAS_HANDLER))
- size += sprintf(buf + size, " invalid");
- else if (status & ACPI_EVENT_FLAG_ENABLED)
- size += sprintf(buf + size, " enabled");
- else if (status & ACPI_EVENT_FLAG_WAKE_ENABLED)
- size += sprintf(buf + size, " wake_enabled");
+ if (status & ACPI_EVENT_FLAG_ENABLE_SET)
+ size += sprintf(buf + size, " EN");
else
- size += sprintf(buf + size, " disabled");
+ size += sprintf(buf + size, " ");
+ if (status & ACPI_EVENT_FLAG_STATUS_SET)
+ size += sprintf(buf + size, " STS");
+ else
+ size += sprintf(buf + size, " ");
+
+ if (!(status & ACPI_EVENT_FLAG_HAS_HANDLER))
+ size += sprintf(buf + size, " invalid ");
+ else if (status & ACPI_EVENT_FLAG_ENABLED)
+ size += sprintf(buf + size, " enabled ");
+ else if (status & ACPI_EVENT_FLAG_WAKE_ENABLED)
+ size += sprintf(buf + size, " wake_enabled");
+ else
+ size += sprintf(buf + size, " disabled ");
+ if (status & ACPI_EVENT_FLAG_MASKED)
+ size += sprintf(buf + size, " masked ");
+ else
+ size += sprintf(buf + size, " unmasked");
end:
size += sprintf(buf + size, "\n");
@@ -655,8 +674,12 @@
!(status & ACPI_EVENT_FLAG_ENABLED))
result = acpi_enable_gpe(handle, index);
else if (!strcmp(buf, "clear\n") &&
- (status & ACPI_EVENT_FLAG_SET))
+ (status & ACPI_EVENT_FLAG_STATUS_SET))
result = acpi_clear_gpe(handle, index);
+ else if (!strcmp(buf, "mask\n"))
+ result = acpi_mask_gpe(handle, index, TRUE);
+ else if (!strcmp(buf, "unmask\n"))
+ result = acpi_mask_gpe(handle, index, FALSE);
else if (!kstrtoul(buf, 0, &tmp))
all_counters[index].count = tmp;
else
@@ -664,13 +687,13 @@
} else if (index < num_gpes + ACPI_NUM_FIXED_EVENTS) {
int event = index - num_gpes;
if (!strcmp(buf, "disable\n") &&
- (status & ACPI_EVENT_FLAG_ENABLED))
+ (status & ACPI_EVENT_FLAG_ENABLE_SET))
result = acpi_disable_event(event, ACPI_NOT_ISR);
else if (!strcmp(buf, "enable\n") &&
- !(status & ACPI_EVENT_FLAG_ENABLED))
+ !(status & ACPI_EVENT_FLAG_ENABLE_SET))
result = acpi_enable_event(event, ACPI_NOT_ISR);
else if (!strcmp(buf, "clear\n") &&
- (status & ACPI_EVENT_FLAG_SET))
+ (status & ACPI_EVENT_FLAG_STATUS_SET))
result = acpi_clear_event(event);
else if (!kstrtoul(buf, 0, &tmp))
all_counters[index].count = tmp;
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 9f0ad6e..cdd56c4 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -35,7 +35,6 @@
#include <linux/earlycpio.h>
#include <linux/memblock.h>
#include <linux/initrd.h>
-#include <linux/acpi.h>
#include "internal.h"
#ifdef CONFIG_ACPI_CUSTOM_DSDT
@@ -246,6 +245,7 @@
struct acpi_subtable_header *entry;
unsigned long table_end;
int count = 0;
+ int errs = 0;
int i;
if (acpi_disabled)
@@ -278,10 +278,12 @@
if (entry->type != proc[i].id)
continue;
if (!proc[i].handler ||
- proc[i].handler(entry, table_end))
- return -EINVAL;
+ (!errs && proc[i].handler(entry, table_end))) {
+ errs++;
+ continue;
+ }
- proc->count++;
+ proc[i].count++;
break;
}
if (i != proc_num)
@@ -301,11 +303,11 @@
}
if (max_entries && count > max_entries) {
- pr_warn("[%4.4s:0x%02x] ignored %i entries of %i found\n",
- id, proc->id, count - max_entries, count);
+ pr_warn("[%4.4s:0x%02x] found the maximum %i entries\n",
+ id, proc->id, count);
}
- return count;
+ return errs ? -EINVAL : count;
}
int __init
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 98504ec..fdf44ca 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -212,6 +212,16 @@
If you are unsure about this, Say N here.
+config DEBUG_TEST_DRIVER_REMOVE
+ bool "Test driver remove calls during probe"
+ depends on DEBUG_KERNEL
+ help
+ Say Y here if you want the Driver core to test driver remove functions
+ by calling probe, remove, probe. This tests the remove path without
+ having to unbind the driver or unload the driver module.
+
+ If you are unsure about this, say N here.
+
config SYS_HYPERVISOR
bool
default n
diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c
index 2ba4cac..95e3ef8 100644
--- a/drivers/base/attribute_container.c
+++ b/drivers/base/attribute_container.c
@@ -243,7 +243,7 @@
* @dev: The generic device to run the trigger for
* @fn the function to execute for each classdev.
*
- * This funcion is for executing a trigger when you need to know both
+ * This function is for executing a trigger when you need to know both
* the container and the classdev. If you only care about the
* container, then use attribute_container_trigger() instead.
*/
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 0a8bdad..ce057a5 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -836,11 +836,29 @@
return NULL;
}
+static inline bool live_in_glue_dir(struct kobject *kobj,
+ struct device *dev)
+{
+ if (!kobj || !dev->class ||
+ kobj->kset != &dev->class->p->glue_dirs)
+ return false;
+ return true;
+}
+
+static inline struct kobject *get_glue_dir(struct device *dev)
+{
+ return dev->kobj.parent;
+}
+
+/*
+ * make sure cleaning up dir as the last step, we need to make
+ * sure .release handler of kobject is run with holding the
+ * global lock
+ */
static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
{
/* see if we live in a "glue" directory */
- if (!glue_dir || !dev->class ||
- glue_dir->kset != &dev->class->p->glue_dirs)
+ if (!live_in_glue_dir(glue_dir, dev))
return;
mutex_lock(&gdp_mutex);
@@ -848,11 +866,6 @@
mutex_unlock(&gdp_mutex);
}
-static void cleanup_device_parent(struct device *dev)
-{
- cleanup_glue_dir(dev, dev->kobj.parent);
-}
-
static int device_add_class_symlinks(struct device *dev)
{
struct device_node *of_node = dev_of_node(dev);
@@ -1028,6 +1041,7 @@
struct kobject *kobj;
struct class_interface *class_intf;
int error = -EINVAL;
+ struct kobject *glue_dir = NULL;
dev = get_device(dev);
if (!dev)
@@ -1072,8 +1086,10 @@
/* first, register with generic layer. */
/* we require the name to be set before, and pass NULL */
error = kobject_add(&dev->kobj, dev->kobj.parent, NULL);
- if (error)
+ if (error) {
+ glue_dir = get_glue_dir(dev);
goto Error;
+ }
/* notify platform of device entry */
if (platform_notify)
@@ -1154,9 +1170,10 @@
device_remove_file(dev, &dev_attr_uevent);
attrError:
kobject_uevent(&dev->kobj, KOBJ_REMOVE);
+ glue_dir = get_glue_dir(dev);
kobject_del(&dev->kobj);
Error:
- cleanup_device_parent(dev);
+ cleanup_glue_dir(dev, glue_dir);
put_device(parent);
name_error:
kfree(dev->p);
@@ -1232,6 +1249,7 @@
void device_del(struct device *dev)
{
struct device *parent = dev->parent;
+ struct kobject *glue_dir = NULL;
struct class_interface *class_intf;
/* Notify clients of device removal. This call must come
@@ -1266,6 +1284,7 @@
bus_remove_device(dev);
device_pm_remove(dev);
driver_deferred_probe_del(dev);
+ device_remove_properties(dev);
/* Notify the platform of the removal, in case they
* need to do anything...
@@ -1276,8 +1295,9 @@
blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
BUS_NOTIFY_REMOVED_DEVICE, dev);
kobject_uevent(&dev->kobj, KOBJ_REMOVE);
- cleanup_device_parent(dev);
+ glue_dir = get_glue_dir(dev);
kobject_del(&dev->kobj);
+ cleanup_glue_dir(dev, glue_dir);
put_device(parent);
}
EXPORT_SYMBOL_GPL(device_del);
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 691eeea..4c28e1a 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -371,12 +371,13 @@
if (cpu->hotpluggable)
cpu->dev.groups = hotplugable_cpu_attr_groups;
error = device_register(&cpu->dev);
- if (!error)
- per_cpu(cpu_sys_devices, num) = &cpu->dev;
- if (!error)
- register_cpu_under_node(num, cpu_to_node(num));
+ if (error)
+ return error;
- return error;
+ per_cpu(cpu_sys_devices, num) = &cpu->dev;
+ register_cpu_under_node(num, cpu_to_node(num));
+
+ return 0;
}
struct device *get_cpu_device(unsigned cpu)
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 16688f5..d22a726 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -51,7 +51,6 @@
static DEFINE_MUTEX(deferred_probe_mutex);
static LIST_HEAD(deferred_probe_pending_list);
static LIST_HEAD(deferred_probe_active_list);
-static struct workqueue_struct *deferred_wq;
static atomic_t deferred_trigger_count = ATOMIC_INIT(0);
/*
@@ -175,7 +174,7 @@
* Kick the re-probe thread. It may already be scheduled, but it is
* safe to kick it again.
*/
- queue_work(deferred_wq, &deferred_probe_work);
+ schedule_work(&deferred_probe_work);
}
/**
@@ -211,14 +210,10 @@
*/
static int deferred_probe_initcall(void)
{
- deferred_wq = create_singlethread_workqueue("deferwq");
- if (WARN_ON(!deferred_wq))
- return -ENOMEM;
-
driver_deferred_probe_enable = true;
driver_deferred_probe_trigger();
/* Sort as many dependencies as possible before exiting initcalls */
- flush_workqueue(deferred_wq);
+ flush_work(&deferred_probe_work);
return 0;
}
late_initcall(deferred_probe_initcall);
@@ -329,6 +324,7 @@
{
int ret = -EPROBE_DEFER;
int local_trigger_count = atomic_read(&deferred_trigger_count);
+ bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE);
if (defer_all_probes) {
/*
@@ -346,6 +342,7 @@
drv->bus->name, __func__, drv->name, dev_name(dev));
WARN_ON(!list_empty(&dev->devres_head));
+re_probe:
dev->driver = drv;
/* If using pinctrl, bind pins now before probing */
@@ -383,6 +380,25 @@
goto probe_failed;
}
+ if (test_remove) {
+ test_remove = false;
+
+ if (dev->bus && dev->bus->remove)
+ dev->bus->remove(dev);
+ else if (drv->remove)
+ drv->remove(dev);
+
+ devres_release_all(dev);
+ driver_sysfs_remove(dev);
+ dev->driver = NULL;
+ dev_set_drvdata(dev, NULL);
+ if (dev->pm_domain && dev->pm_domain->dismiss)
+ dev->pm_domain->dismiss(dev);
+ pm_runtime_reinit(dev);
+
+ goto re_probe;
+ }
+
pinctrl_init_done(dev);
if (dev->pm_domain && dev->pm_domain->sync)
@@ -460,8 +476,7 @@
void wait_for_device_probe(void)
{
/* wait for the deferred probe workqueue to finish */
- if (driver_deferred_probe_enable)
- flush_workqueue(deferred_wq);
+ flush_work(&deferred_probe_work);
/* wait for the known devices to complete their probing */
wait_event(probe_waitqueue, atomic_read(&probe_count) == 0);
diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index bdf28f7..640a7e6 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -165,6 +165,7 @@
int order = get_order(size);
unsigned long flags;
int pageno;
+ int dma_memory_map;
if (!dev)
return 0;
@@ -187,11 +188,12 @@
*/
*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
*ret = mem->virt_base + (pageno << PAGE_SHIFT);
- if (mem->flags & DMA_MEMORY_MAP)
+ dma_memory_map = (mem->flags & DMA_MEMORY_MAP);
+ spin_unlock_irqrestore(&mem->spinlock, flags);
+ if (dma_memory_map)
memset(*ret, 0, size);
else
memset_io(*ret, 0, size);
- spin_unlock_irqrestore(&mem->spinlock, flags);
return 1;
@@ -261,8 +263,8 @@
(mem->virt_base + (mem->size << PAGE_SHIFT))) {
unsigned long off = vma->vm_pgoff;
int start = (vaddr - mem->virt_base) >> PAGE_SHIFT;
- int user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
- int count = size >> PAGE_SHIFT;
+ int user_count = vma_pages(vma);
+ int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
*ret = -ENXIO;
if (off < count && user_count <= count - off) {
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index d799662..8f8b68c 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -198,10 +198,13 @@
rc = dma_declare_coherent_memory(dev, phys_addr, device_addr, size,
flags);
- if (rc == 0)
+ if (rc) {
devres_add(dev, res);
- else
+ rc = 0;
+ } else {
devres_free(res);
+ rc = -ENOMEM;
+ }
return rc;
}
@@ -247,7 +250,7 @@
{
int ret = -ENXIO;
#if defined(CONFIG_MMU) && !defined(CONFIG_ARCH_NO_COHERENT_DMA_MMAP)
- unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ unsigned long user_count = vma_pages(vma);
unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr));
unsigned long off = vma->vm_pgoff;
@@ -334,7 +337,7 @@
return;
}
- unmap_kernel_range((unsigned long)cpu_addr, size);
+ unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
vunmap(cpu_addr);
}
#endif
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 279e539..be6a599 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -142,13 +142,12 @@
}
for (i = 0; i < nvec; i++) {
- desc = alloc_msi_entry(dev);
+ desc = alloc_msi_entry(dev, 1, NULL);
if (!desc)
break;
desc->platform.msi_priv_data = data;
desc->platform.msi_index = base + i;
- desc->nvec_used = 1;
desc->irq = virq ? virq + i : 0;
list_add_tail(&desc->list, dev_to_msi_list(dev));
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 6482d47..c4af003 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -97,7 +97,7 @@
int ret;
ret = of_irq_get(dev->dev.of_node, num);
- if (ret >= 0 || ret == -EPROBE_DEFER)
+ if (ret > 0 || ret == -EPROBE_DEFER)
return ret;
}
@@ -108,9 +108,14 @@
* IORESOURCE_BITS correspond 1-to-1 to the IRQF_TRIGGER*
* settings.
*/
- if (r && r->flags & IORESOURCE_BITS)
- irqd_set_trigger_type(irq_get_irq_data(r->start),
- r->flags & IORESOURCE_BITS);
+ if (r && r->flags & IORESOURCE_BITS) {
+ struct irq_data *irqd;
+
+ irqd = irq_get_irq_data(r->start);
+ if (!irqd)
+ return -ENXIO;
+ irqd_set_trigger_type(irqd, r->flags & IORESOURCE_BITS);
+ }
return r ? r->start : -ENXIO;
#endif
@@ -175,7 +180,7 @@
int ret;
ret = of_irq_get_byname(dev->dev.of_node, name);
- if (ret >= 0 || ret == -EPROBE_DEFER)
+ if (ret > 0 || ret == -EPROBE_DEFER)
return ret;
}
@@ -434,6 +439,7 @@
int i;
if (pdev) {
+ device_remove_properties(&pdev->dev);
device_del(&pdev->dev);
if (pdev->id_auto) {
@@ -446,8 +452,6 @@
if (r->parent)
release_resource(r);
}
-
- device_remove_properties(&pdev->dev);
}
}
EXPORT_SYMBOL_GPL(platform_device_del);
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index a1f2aff..e023066 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -45,7 +45,7 @@
* and checks that the PM domain pointer is a real generic PM domain.
* Any failure results in NULL being returned.
*/
-struct generic_pm_domain *pm_genpd_lookup_dev(struct device *dev)
+static struct generic_pm_domain *genpd_lookup_dev(struct device *dev)
{
struct generic_pm_domain *genpd = NULL, *gpd;
@@ -586,7 +586,7 @@
}
late_initcall(genpd_poweroff_unused);
-#ifdef CONFIG_PM_SLEEP
+#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_GENERIC_DOMAINS_OF)
/**
* pm_genpd_present - Check if the given PM domain has been initialized.
@@ -606,6 +606,10 @@
return false;
}
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+
static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd,
struct device *dev)
{
@@ -613,9 +617,8 @@
}
/**
- * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters.
+ * genpd_sync_poweroff - Synchronously power off a PM domain and its masters.
* @genpd: PM domain to power off, if possible.
- * @timed: True if latency measurements are allowed.
*
* Check if the given PM domain can be powered off (during system suspend or
* hibernation) and do that if so. Also, in that case propagate to its masters.
@@ -625,8 +628,7 @@
* executed sequentially, so it is guaranteed that it will never run twice in
* parallel).
*/
-static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd,
- bool timed)
+static void genpd_sync_poweroff(struct generic_pm_domain *genpd)
{
struct gpd_link *link;
@@ -639,28 +641,26 @@
/* Choose the deepest state when suspending */
genpd->state_idx = genpd->state_count - 1;
- genpd_power_off(genpd, timed);
+ genpd_power_off(genpd, false);
genpd->status = GPD_STATE_POWER_OFF;
list_for_each_entry(link, &genpd->slave_links, slave_node) {
genpd_sd_counter_dec(link->master);
- pm_genpd_sync_poweroff(link->master, timed);
+ genpd_sync_poweroff(link->master);
}
}
/**
- * pm_genpd_sync_poweron - Synchronously power on a PM domain and its masters.
+ * genpd_sync_poweron - Synchronously power on a PM domain and its masters.
* @genpd: PM domain to power on.
- * @timed: True if latency measurements are allowed.
*
* This function is only called in "noirq" and "syscore" stages of system power
* transitions, so it need not acquire locks (all of the "noirq" callbacks are
* executed sequentially, so it is guaranteed that it will never run twice in
* parallel).
*/
-static void pm_genpd_sync_poweron(struct generic_pm_domain *genpd,
- bool timed)
+static void genpd_sync_poweron(struct generic_pm_domain *genpd)
{
struct gpd_link *link;
@@ -668,11 +668,11 @@
return;
list_for_each_entry(link, &genpd->slave_links, slave_node) {
- pm_genpd_sync_poweron(link->master, timed);
+ genpd_sync_poweron(link->master);
genpd_sd_counter_inc(link->master);
}
- genpd_power_on(genpd, timed);
+ genpd_power_on(genpd, false);
genpd->status = GPD_STATE_ACTIVE;
}
@@ -784,7 +784,7 @@
* the same PM domain, so it is not necessary to use locking here.
*/
genpd->suspended_count++;
- pm_genpd_sync_poweroff(genpd, true);
+ genpd_sync_poweroff(genpd);
return 0;
}
@@ -814,7 +814,7 @@
* guaranteed that this function will never run twice in parallel for
* the same PM domain, so it is not necessary to use locking here.
*/
- pm_genpd_sync_poweron(genpd, true);
+ genpd_sync_poweron(genpd);
genpd->suspended_count--;
if (genpd->dev_ops.stop && genpd->dev_ops.start)
@@ -902,12 +902,12 @@
if (genpd->suspended_count++ == 0)
/*
* The boot kernel might put the domain into arbitrary state,
- * so make it appear as powered off to pm_genpd_sync_poweron(),
+ * so make it appear as powered off to genpd_sync_poweron(),
* so that it tries to power it on in case it was really off.
*/
genpd->status = GPD_STATE_POWER_OFF;
- pm_genpd_sync_poweron(genpd, true);
+ genpd_sync_poweron(genpd);
if (genpd->dev_ops.stop && genpd->dev_ops.start)
ret = pm_runtime_force_resume(dev);
@@ -962,9 +962,9 @@
if (suspend) {
genpd->suspended_count++;
- pm_genpd_sync_poweroff(genpd, false);
+ genpd_sync_poweroff(genpd);
} else {
- pm_genpd_sync_poweron(genpd, false);
+ genpd_sync_poweron(genpd);
genpd->suspended_count--;
}
}
@@ -1056,14 +1056,8 @@
dev_pm_put_subsys_data(dev);
}
-/**
- * __pm_genpd_add_device - Add a device to an I/O PM domain.
- * @genpd: PM domain to add the device to.
- * @dev: Device to be added.
- * @td: Set of PM QoS timing parameters to attach to the device.
- */
-int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
- struct gpd_timing_data *td)
+static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
+ struct gpd_timing_data *td)
{
struct generic_pm_domain_data *gpd_data;
int ret = 0;
@@ -1103,15 +1097,28 @@
return ret;
}
-EXPORT_SYMBOL_GPL(__pm_genpd_add_device);
/**
- * pm_genpd_remove_device - Remove a device from an I/O PM domain.
- * @genpd: PM domain to remove the device from.
- * @dev: Device to be removed.
+ * __pm_genpd_add_device - Add a device to an I/O PM domain.
+ * @genpd: PM domain to add the device to.
+ * @dev: Device to be added.
+ * @td: Set of PM QoS timing parameters to attach to the device.
*/
-int pm_genpd_remove_device(struct generic_pm_domain *genpd,
- struct device *dev)
+int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
+ struct gpd_timing_data *td)
+{
+ int ret;
+
+ mutex_lock(&gpd_list_lock);
+ ret = genpd_add_device(genpd, dev, td);
+ mutex_unlock(&gpd_list_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__pm_genpd_add_device);
+
+static int genpd_remove_device(struct generic_pm_domain *genpd,
+ struct device *dev)
{
struct generic_pm_domain_data *gpd_data;
struct pm_domain_data *pdd;
@@ -1119,10 +1126,6 @@
dev_dbg(dev, "%s()\n", __func__);
- if (!genpd || genpd != pm_genpd_lookup_dev(dev))
- return -EINVAL;
-
- /* The above validation also means we have existing domain_data. */
pdd = dev->power.subsys_data->domain_data;
gpd_data = to_gpd_data(pdd);
dev_pm_qos_remove_notifier(dev, &gpd_data->nb);
@@ -1154,15 +1157,24 @@
return ret;
}
-EXPORT_SYMBOL_GPL(pm_genpd_remove_device);
/**
- * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain.
- * @genpd: Master PM domain to add the subdomain to.
- * @subdomain: Subdomain to be added.
+ * pm_genpd_remove_device - Remove a device from an I/O PM domain.
+ * @genpd: PM domain to remove the device from.
+ * @dev: Device to be removed.
*/
-int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
- struct generic_pm_domain *subdomain)
+int pm_genpd_remove_device(struct generic_pm_domain *genpd,
+ struct device *dev)
+{
+ if (!genpd || genpd != genpd_lookup_dev(dev))
+ return -EINVAL;
+
+ return genpd_remove_device(genpd, dev);
+}
+EXPORT_SYMBOL_GPL(pm_genpd_remove_device);
+
+static int genpd_add_subdomain(struct generic_pm_domain *genpd,
+ struct generic_pm_domain *subdomain)
{
struct gpd_link *link, *itr;
int ret = 0;
@@ -1205,6 +1217,23 @@
kfree(link);
return ret;
}
+
+/**
+ * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain.
+ * @genpd: Master PM domain to add the subdomain to.
+ * @subdomain: Subdomain to be added.
+ */
+int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
+ struct generic_pm_domain *subdomain)
+{
+ int ret;
+
+ mutex_lock(&gpd_list_lock);
+ ret = genpd_add_subdomain(genpd, subdomain);
+ mutex_unlock(&gpd_list_lock);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(pm_genpd_add_subdomain);
/**
@@ -1278,27 +1307,17 @@
genpd->device_count = 0;
genpd->max_off_time_ns = -1;
genpd->max_off_time_changed = true;
+ genpd->provider = NULL;
+ genpd->has_provider = false;
genpd->domain.ops.runtime_suspend = genpd_runtime_suspend;
genpd->domain.ops.runtime_resume = genpd_runtime_resume;
genpd->domain.ops.prepare = pm_genpd_prepare;
- genpd->domain.ops.suspend = pm_generic_suspend;
- genpd->domain.ops.suspend_late = pm_generic_suspend_late;
genpd->domain.ops.suspend_noirq = pm_genpd_suspend_noirq;
genpd->domain.ops.resume_noirq = pm_genpd_resume_noirq;
- genpd->domain.ops.resume_early = pm_generic_resume_early;
- genpd->domain.ops.resume = pm_generic_resume;
- genpd->domain.ops.freeze = pm_generic_freeze;
- genpd->domain.ops.freeze_late = pm_generic_freeze_late;
genpd->domain.ops.freeze_noirq = pm_genpd_freeze_noirq;
genpd->domain.ops.thaw_noirq = pm_genpd_thaw_noirq;
- genpd->domain.ops.thaw_early = pm_generic_thaw_early;
- genpd->domain.ops.thaw = pm_generic_thaw;
- genpd->domain.ops.poweroff = pm_generic_poweroff;
- genpd->domain.ops.poweroff_late = pm_generic_poweroff_late;
genpd->domain.ops.poweroff_noirq = pm_genpd_suspend_noirq;
genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq;
- genpd->domain.ops.restore_early = pm_generic_restore_early;
- genpd->domain.ops.restore = pm_generic_restore;
genpd->domain.ops.complete = pm_genpd_complete;
if (genpd->flags & GENPD_FLAG_PM_CLK) {
@@ -1328,7 +1347,71 @@
}
EXPORT_SYMBOL_GPL(pm_genpd_init);
+static int genpd_remove(struct generic_pm_domain *genpd)
+{
+ struct gpd_link *l, *link;
+
+ if (IS_ERR_OR_NULL(genpd))
+ return -EINVAL;
+
+ mutex_lock(&genpd->lock);
+
+ if (genpd->has_provider) {
+ mutex_unlock(&genpd->lock);
+ pr_err("Provider present, unable to remove %s\n", genpd->name);
+ return -EBUSY;
+ }
+
+ if (!list_empty(&genpd->master_links) || genpd->device_count) {
+ mutex_unlock(&genpd->lock);
+ pr_err("%s: unable to remove %s\n", __func__, genpd->name);
+ return -EBUSY;
+ }
+
+ list_for_each_entry_safe(link, l, &genpd->slave_links, slave_node) {
+ list_del(&link->master_node);
+ list_del(&link->slave_node);
+ kfree(link);
+ }
+
+ list_del(&genpd->gpd_list_node);
+ mutex_unlock(&genpd->lock);
+ cancel_work_sync(&genpd->power_off_work);
+ pr_debug("%s: removed %s\n", __func__, genpd->name);
+
+ return 0;
+}
+
+/**
+ * pm_genpd_remove - Remove a generic I/O PM domain
+ * @genpd: Pointer to PM domain that is to be removed.
+ *
+ * To remove the PM domain, this function:
+ * - Removes the PM domain as a subdomain to any parent domains,
+ * if it was added.
+ * - Removes the PM domain from the list of registered PM domains.
+ *
+ * The PM domain will only be removed, if the associated provider has
+ * been removed, it is not a parent to any other PM domain and has no
+ * devices associated with it.
+ */
+int pm_genpd_remove(struct generic_pm_domain *genpd)
+{
+ int ret;
+
+ mutex_lock(&gpd_list_lock);
+ ret = genpd_remove(genpd);
+ mutex_unlock(&gpd_list_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(pm_genpd_remove);
+
#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
+
+typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args,
+ void *data);
+
/*
* Device Tree based PM domain providers.
*
@@ -1340,8 +1423,8 @@
* maps a PM domain specifier retrieved from the device tree to a PM domain.
*
* Two simple mapping functions have been provided for convenience:
- * - __of_genpd_xlate_simple() for 1:1 device tree node to PM domain mapping.
- * - __of_genpd_xlate_onecell() for mapping of multiple PM domains per node by
+ * - genpd_xlate_simple() for 1:1 device tree node to PM domain mapping.
+ * - genpd_xlate_onecell() for mapping of multiple PM domains per node by
* index.
*/
@@ -1366,7 +1449,7 @@
static DEFINE_MUTEX(of_genpd_mutex);
/**
- * __of_genpd_xlate_simple() - Xlate function for direct node-domain mapping
+ * genpd_xlate_simple() - Xlate function for direct node-domain mapping
* @genpdspec: OF phandle args to map into a PM domain
* @data: xlate function private data - pointer to struct generic_pm_domain
*
@@ -1374,7 +1457,7 @@
* have their own device tree nodes. The private data of xlate function needs
* to be a valid pointer to struct generic_pm_domain.
*/
-struct generic_pm_domain *__of_genpd_xlate_simple(
+static struct generic_pm_domain *genpd_xlate_simple(
struct of_phandle_args *genpdspec,
void *data)
{
@@ -1382,10 +1465,9 @@
return ERR_PTR(-EINVAL);
return data;
}
-EXPORT_SYMBOL_GPL(__of_genpd_xlate_simple);
/**
- * __of_genpd_xlate_onecell() - Xlate function using a single index.
+ * genpd_xlate_onecell() - Xlate function using a single index.
* @genpdspec: OF phandle args to map into a PM domain
* @data: xlate function private data - pointer to struct genpd_onecell_data
*
@@ -1394,7 +1476,7 @@
* A single cell is used as an index into an array of PM domains specified in
* the genpd_onecell_data struct when registering the provider.
*/
-struct generic_pm_domain *__of_genpd_xlate_onecell(
+static struct generic_pm_domain *genpd_xlate_onecell(
struct of_phandle_args *genpdspec,
void *data)
{
@@ -1414,16 +1496,15 @@
return genpd_data->domains[idx];
}
-EXPORT_SYMBOL_GPL(__of_genpd_xlate_onecell);
/**
- * __of_genpd_add_provider() - Register a PM domain provider for a node
+ * genpd_add_provider() - Register a PM domain provider for a node
* @np: Device node pointer associated with the PM domain provider.
* @xlate: Callback for decoding PM domain from phandle arguments.
* @data: Context pointer for @xlate callback.
*/
-int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
- void *data)
+static int genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
+ void *data)
{
struct of_genpd_provider *cp;
@@ -1442,7 +1523,83 @@
return 0;
}
-EXPORT_SYMBOL_GPL(__of_genpd_add_provider);
+
+/**
+ * of_genpd_add_provider_simple() - Register a simple PM domain provider
+ * @np: Device node pointer associated with the PM domain provider.
+ * @genpd: Pointer to PM domain associated with the PM domain provider.
+ */
+int of_genpd_add_provider_simple(struct device_node *np,
+ struct generic_pm_domain *genpd)
+{
+ int ret = -EINVAL;
+
+ if (!np || !genpd)
+ return -EINVAL;
+
+ mutex_lock(&gpd_list_lock);
+
+ if (pm_genpd_present(genpd))
+ ret = genpd_add_provider(np, genpd_xlate_simple, genpd);
+
+ if (!ret) {
+ genpd->provider = &np->fwnode;
+ genpd->has_provider = true;
+ }
+
+ mutex_unlock(&gpd_list_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(of_genpd_add_provider_simple);
+
+/**
+ * of_genpd_add_provider_onecell() - Register a onecell PM domain provider
+ * @np: Device node pointer associated with the PM domain provider.
+ * @data: Pointer to the data associated with the PM domain provider.
+ */
+int of_genpd_add_provider_onecell(struct device_node *np,
+ struct genpd_onecell_data *data)
+{
+ unsigned int i;
+ int ret = -EINVAL;
+
+ if (!np || !data)
+ return -EINVAL;
+
+ mutex_lock(&gpd_list_lock);
+
+ for (i = 0; i < data->num_domains; i++) {
+ if (!data->domains[i])
+ continue;
+ if (!pm_genpd_present(data->domains[i]))
+ goto error;
+
+ data->domains[i]->provider = &np->fwnode;
+ data->domains[i]->has_provider = true;
+ }
+
+ ret = genpd_add_provider(np, genpd_xlate_onecell, data);
+ if (ret < 0)
+ goto error;
+
+ mutex_unlock(&gpd_list_lock);
+
+ return 0;
+
+error:
+ while (i--) {
+ if (!data->domains[i])
+ continue;
+ data->domains[i]->provider = NULL;
+ data->domains[i]->has_provider = false;
+ }
+
+ mutex_unlock(&gpd_list_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(of_genpd_add_provider_onecell);
/**
* of_genpd_del_provider() - Remove a previously registered PM domain provider
@@ -1451,10 +1608,21 @@
void of_genpd_del_provider(struct device_node *np)
{
struct of_genpd_provider *cp;
+ struct generic_pm_domain *gpd;
+ mutex_lock(&gpd_list_lock);
mutex_lock(&of_genpd_mutex);
list_for_each_entry(cp, &of_genpd_providers, link) {
if (cp->node == np) {
+ /*
+ * For each PM domain associated with the
+ * provider, set the 'has_provider' to false
+ * so that the PM domain can be safely removed.
+ */
+ list_for_each_entry(gpd, &gpd_list, gpd_list_node)
+ if (gpd->provider == &np->fwnode)
+ gpd->has_provider = false;
+
list_del(&cp->link);
of_node_put(cp->node);
kfree(cp);
@@ -1462,11 +1630,12 @@
}
}
mutex_unlock(&of_genpd_mutex);
+ mutex_unlock(&gpd_list_lock);
}
EXPORT_SYMBOL_GPL(of_genpd_del_provider);
/**
- * of_genpd_get_from_provider() - Look-up PM domain
+ * genpd_get_from_provider() - Look-up PM domain
* @genpdspec: OF phandle args to use for look-up
*
* Looks for a PM domain provider under the node specified by @genpdspec and if
@@ -1476,7 +1645,7 @@
* Returns a valid pointer to struct generic_pm_domain on success or ERR_PTR()
* on failure.
*/
-struct generic_pm_domain *of_genpd_get_from_provider(
+static struct generic_pm_domain *genpd_get_from_provider(
struct of_phandle_args *genpdspec)
{
struct generic_pm_domain *genpd = ERR_PTR(-ENOENT);
@@ -1499,7 +1668,109 @@
return genpd;
}
-EXPORT_SYMBOL_GPL(of_genpd_get_from_provider);
+
+/**
+ * of_genpd_add_device() - Add a device to an I/O PM domain
+ * @genpdspec: OF phandle args to use for look-up PM domain
+ * @dev: Device to be added.
+ *
+ * Looks-up an I/O PM domain based upon phandle args provided and adds
+ * the device to the PM domain. Returns a negative error code on failure.
+ */
+int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev)
+{
+ struct generic_pm_domain *genpd;
+ int ret;
+
+ mutex_lock(&gpd_list_lock);
+
+ genpd = genpd_get_from_provider(genpdspec);
+ if (IS_ERR(genpd)) {
+ ret = PTR_ERR(genpd);
+ goto out;
+ }
+
+ ret = genpd_add_device(genpd, dev, NULL);
+
+out:
+ mutex_unlock(&gpd_list_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(of_genpd_add_device);
+
+/**
+ * of_genpd_add_subdomain - Add a subdomain to an I/O PM domain.
+ * @parent_spec: OF phandle args to use for parent PM domain look-up
+ * @subdomain_spec: OF phandle args to use for subdomain look-up
+ *
+ * Looks-up a parent PM domain and subdomain based upon phandle args
+ * provided and adds the subdomain to the parent PM domain. Returns a
+ * negative error code on failure.
+ */
+int of_genpd_add_subdomain(struct of_phandle_args *parent_spec,
+ struct of_phandle_args *subdomain_spec)
+{
+ struct generic_pm_domain *parent, *subdomain;
+ int ret;
+
+ mutex_lock(&gpd_list_lock);
+
+ parent = genpd_get_from_provider(parent_spec);
+ if (IS_ERR(parent)) {
+ ret = PTR_ERR(parent);
+ goto out;
+ }
+
+ subdomain = genpd_get_from_provider(subdomain_spec);
+ if (IS_ERR(subdomain)) {
+ ret = PTR_ERR(subdomain);
+ goto out;
+ }
+
+ ret = genpd_add_subdomain(parent, subdomain);
+
+out:
+ mutex_unlock(&gpd_list_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(of_genpd_add_subdomain);
+
+/**
+ * of_genpd_remove_last - Remove the last PM domain registered for a provider
+ * @provider: Pointer to device structure associated with provider
+ *
+ * Find the last PM domain that was added by a particular provider and
+ * remove this PM domain from the list of PM domains. The provider is
+ * identified by the 'provider' device structure that is passed. The PM
+ * domain will only be removed, if the provider associated with domain
+ * has been removed.
+ *
+ * Returns a valid pointer to struct generic_pm_domain on success or
+ * ERR_PTR() on failure.
+ */
+struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
+{
+ struct generic_pm_domain *gpd, *genpd = ERR_PTR(-ENOENT);
+ int ret;
+
+ if (IS_ERR_OR_NULL(np))
+ return ERR_PTR(-EINVAL);
+
+ mutex_lock(&gpd_list_lock);
+ list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
+ if (gpd->provider == &np->fwnode) {
+ ret = genpd_remove(gpd);
+ genpd = ret ? ERR_PTR(ret) : gpd;
+ break;
+ }
+ }
+ mutex_unlock(&gpd_list_lock);
+
+ return genpd;
+}
+EXPORT_SYMBOL_GPL(of_genpd_remove_last);
/**
* genpd_dev_pm_detach - Detach a device from its PM domain.
@@ -1515,14 +1786,14 @@
unsigned int i;
int ret = 0;
- pd = pm_genpd_lookup_dev(dev);
- if (!pd)
+ pd = dev_to_genpd(dev);
+ if (IS_ERR(pd))
return;
dev_dbg(dev, "removing from PM domain %s\n", pd->name);
for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) {
- ret = pm_genpd_remove_device(pd, dev);
+ ret = genpd_remove_device(pd, dev);
if (ret != -EAGAIN)
break;
@@ -1596,9 +1867,11 @@
return -ENOENT;
}
- pd = of_genpd_get_from_provider(&pd_args);
+ mutex_lock(&gpd_list_lock);
+ pd = genpd_get_from_provider(&pd_args);
of_node_put(pd_args.np);
if (IS_ERR(pd)) {
+ mutex_unlock(&gpd_list_lock);
dev_dbg(dev, "%s() failed to find PM domain: %ld\n",
__func__, PTR_ERR(pd));
return -EPROBE_DEFER;
@@ -1607,13 +1880,14 @@
dev_dbg(dev, "adding to PM domain %s\n", pd->name);
for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) {
- ret = pm_genpd_add_device(pd, dev);
+ ret = genpd_add_device(pd, dev, NULL);
if (ret != -EAGAIN)
break;
mdelay(i);
cond_resched();
}
+ mutex_unlock(&gpd_list_lock);
if (ret < 0) {
dev_err(dev, "failed to add to PM domain %s: %d",
@@ -1636,7 +1910,7 @@
/*** debugfs support ***/
-#ifdef CONFIG_PM_ADVANCED_DEBUG
+#ifdef CONFIG_DEBUG_FS
#include <linux/pm.h>
#include <linux/device.h>
#include <linux/debugfs.h>
@@ -1784,4 +2058,4 @@
debugfs_remove_recursive(pm_genpd_debugfs_dir);
}
__exitcall(pm_genpd_debug_exit);
-#endif /* CONFIG_PM_ADVANCED_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index df0c709..4c7c6da 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -584,7 +584,6 @@
struct clk *clk;
unsigned long freq, old_freq;
unsigned long u_volt, u_volt_min, u_volt_max;
- unsigned long ou_volt, ou_volt_min, ou_volt_max;
int ret;
if (unlikely(!target_freq)) {
@@ -620,11 +619,7 @@
}
old_opp = _find_freq_ceil(opp_table, &old_freq);
- if (!IS_ERR(old_opp)) {
- ou_volt = old_opp->u_volt;
- ou_volt_min = old_opp->u_volt_min;
- ou_volt_max = old_opp->u_volt_max;
- } else {
+ if (IS_ERR(old_opp)) {
dev_err(dev, "%s: failed to find current OPP for freq %lu (%ld)\n",
__func__, old_freq, PTR_ERR(old_opp));
}
@@ -683,7 +678,8 @@
restore_voltage:
/* This shouldn't harm even if the voltages weren't updated earlier */
if (!IS_ERR(old_opp))
- _set_opp_voltage(dev, reg, ou_volt, ou_volt_min, ou_volt_max);
+ _set_opp_voltage(dev, reg, old_opp->u_volt,
+ old_opp->u_volt_min, old_opp->u_volt_max);
return ret;
}
diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c
index 1dfd3dd..5552211 100644
--- a/drivers/base/power/opp/of.c
+++ b/drivers/base/power/opp/of.c
@@ -71,8 +71,18 @@
u32 version;
int ret;
- if (!opp_table->supported_hw)
- return true;
+ if (!opp_table->supported_hw) {
+ /*
+ * In the case that no supported_hw has been set by the
+ * platform but there is an opp-supported-hw value set for
+ * an OPP then the OPP should not be enabled as there is
+ * no way to see if the hardware supports it.
+ */
+ if (of_find_property(np, "opp-supported-hw", NULL))
+ return false;
+ else
+ return true;
+ }
while (count--) {
ret = of_property_read_u32_index(np, "opp-supported-hw", count,
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 25d26bb..e964d06 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1475,7 +1475,11 @@
kfree(buf);
} else if (ret != 0 && !map->cache_bypass && map->format.parse_val) {
- regcache_drop_region(map, reg, reg + 1);
+ /* regcache_drop_region() takes lock that we already have,
+ * thus call map->cache_ops->drop() directly
+ */
+ if (map->cache_ops && map->cache_ops->drop)
+ map->cache_ops->drop(map, reg, reg + 1);
}
trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes);
diff --git a/drivers/base/soc.c b/drivers/base/soc.c
index 75b98aa..b63f23e 100644
--- a/drivers/base/soc.c
+++ b/drivers/base/soc.c
@@ -6,7 +6,6 @@
*/
#include <linux/sysfs.h>
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/stat.h>
#include <linux/slab.h>
@@ -160,11 +159,3 @@
return bus_register(&soc_bus_type);
}
core_initcall(soc_bus_register);
-
-static void __exit soc_bus_unregister(void)
-{
- ida_destroy(&soc_ida);
-
- bus_unregister(&soc_bus_type);
-}
-module_exit(soc_bus_unregister);
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index 3ff229b..c4a75a1 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -377,21 +377,7 @@
.fops = &vhci_fops,
.minor = VHCI_MINOR,
};
-
-static int __init vhci_init(void)
-{
- BT_INFO("Virtual HCI driver ver %s", VERSION);
-
- return misc_register(&vhci_miscdev);
-}
-
-static void __exit vhci_exit(void)
-{
- misc_deregister(&vhci_miscdev);
-}
-
-module_init(vhci_init);
-module_exit(vhci_exit);
+module_misc_device(vhci_miscdev);
module_param(amp, bool, 0644);
MODULE_PARM_DESC(amp, "Create AMP controller device");
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index ffa7c9d..8900823 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -144,15 +144,12 @@
int num_cntrs;
atomic_t active_events;
struct mutex reserve_mutex;
- struct list_head entry;
+ struct hlist_node node;
cpumask_t cpus;
};
#define to_cci_pmu(c) (container_of(c, struct cci_pmu, pmu))
-static DEFINE_MUTEX(cci_pmu_mutex);
-static LIST_HEAD(cci_pmu_list);
-
enum cci_models {
#ifdef CONFIG_ARM_CCI400_PMU
CCI400_R0,
@@ -1506,25 +1503,21 @@
return perf_pmu_register(&cci_pmu->pmu, name, -1);
}
-static int cci_pmu_offline_cpu(unsigned int cpu)
+static int cci_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
- struct cci_pmu *cci_pmu;
+ struct cci_pmu *cci_pmu = hlist_entry_safe(node, struct cci_pmu, node);
unsigned int target;
- mutex_lock(&cci_pmu_mutex);
- list_for_each_entry(cci_pmu, &cci_pmu_list, entry) {
- if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus))
- continue;
- target = cpumask_any_but(cpu_online_mask, cpu);
- if (target >= nr_cpu_ids)
- continue;
- /*
- * TODO: migrate context once core races on event->ctx have
- * been fixed.
- */
- cpumask_set_cpu(target, &cci_pmu->cpus);
- }
- mutex_unlock(&cci_pmu_mutex);
+ if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus))
+ return 0;
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+ /*
+ * TODO: migrate context once core races on event->ctx have
+ * been fixed.
+ */
+ cpumask_set_cpu(target, &cci_pmu->cpus);
return 0;
}
@@ -1768,10 +1761,8 @@
if (ret)
return ret;
- mutex_lock(&cci_pmu_mutex);
- list_add(&cci_pmu->entry, &cci_pmu_list);
- mutex_unlock(&cci_pmu_mutex);
-
+ cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE,
+ &cci_pmu->node);
pr_info("ARM %s PMU driver probed", cci_pmu->model->name);
return 0;
}
@@ -1804,9 +1795,9 @@
{
int ret;
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE,
- "AP_PERF_ARM_CCI_ONLINE", NULL,
- cci_pmu_offline_cpu);
+ ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCI_ONLINE,
+ "AP_PERF_ARM_CCI_ONLINE", NULL,
+ cci_pmu_offline_cpu);
if (ret)
return ret;
diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c
index 884c030..d1074d9 100644
--- a/drivers/bus/arm-ccn.c
+++ b/drivers/bus/arm-ccn.c
@@ -167,7 +167,7 @@
struct hrtimer hrtimer;
cpumask_t cpu;
- struct list_head entry;
+ struct hlist_node node;
struct pmu pmu;
};
@@ -190,9 +190,6 @@
int mn_id;
};
-static DEFINE_MUTEX(arm_ccn_mutex);
-static LIST_HEAD(arm_ccn_list);
-
static int arm_ccn_node_to_xp(int node)
{
return node / CCN_NUM_XP_PORTS;
@@ -1214,30 +1211,24 @@
}
-static int arm_ccn_pmu_offline_cpu(unsigned int cpu)
+static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
- struct arm_ccn_dt *dt;
+ struct arm_ccn_dt *dt = hlist_entry_safe(node, struct arm_ccn_dt, node);
+ struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt);
unsigned int target;
- mutex_lock(&arm_ccn_mutex);
- list_for_each_entry(dt, &arm_ccn_list, entry) {
- struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt);
-
- if (!cpumask_test_and_clear_cpu(cpu, &dt->cpu))
- continue;
- target = cpumask_any_but(cpu_online_mask, cpu);
- if (target >= nr_cpu_ids)
- continue;
- perf_pmu_migrate_context(&dt->pmu, cpu, target);
- cpumask_set_cpu(target, &dt->cpu);
- if (ccn->irq)
- WARN_ON(irq_set_affinity_hint(ccn->irq, &dt->cpu) != 0);
- }
- mutex_unlock(&arm_ccn_mutex);
+ if (!cpumask_test_and_clear_cpu(cpu, &dt->cpu))
+ return 0;
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+ perf_pmu_migrate_context(&dt->pmu, cpu, target);
+ cpumask_set_cpu(target, &dt->cpu);
+ if (ccn->irq)
+ WARN_ON(irq_set_affinity_hint(ccn->irq, &dt->cpu) != 0);
return 0;
}
-
static DEFINE_IDA(arm_ccn_pmu_ida);
static int arm_ccn_pmu_init(struct arm_ccn *ccn)
@@ -1321,9 +1312,8 @@
if (err)
goto error_pmu_register;
- mutex_lock(&arm_ccn_mutex);
- list_add(&ccn->dt.entry, &arm_ccn_list);
- mutex_unlock(&arm_ccn_mutex);
+ cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+ &ccn->dt.node);
return 0;
error_pmu_register:
@@ -1339,10 +1329,8 @@
{
int i;
- mutex_lock(&arm_ccn_mutex);
- list_del(&ccn->dt.entry);
- mutex_unlock(&arm_ccn_mutex);
-
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+ &ccn->dt.node);
if (ccn->irq)
irq_set_affinity_hint(ccn->irq, NULL);
for (i = 0; i < ccn->num_xps; i++)
@@ -1573,9 +1561,9 @@
{
int i, ret;
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
- "AP_PERF_ARM_CCN_ONLINE", NULL,
- arm_ccn_pmu_offline_cpu);
+ ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+ "AP_PERF_ARM_CCN_ONLINE", NULL,
+ arm_ccn_pmu_offline_cpu);
if (ret)
return ret;
@@ -1587,7 +1575,7 @@
static void __exit arm_ccn_exit(void)
{
- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE);
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE);
platform_driver_unregister(&arm_ccn_driver);
}
diff --git a/drivers/bus/mips_cdmm.c b/drivers/bus/mips_cdmm.c
index cad49bc..1b14256 100644
--- a/drivers/bus/mips_cdmm.c
+++ b/drivers/bus/mips_cdmm.c
@@ -596,19 +596,20 @@
BUILD_PERDEV_HELPER(cpu_up) /* int mips_cdmm_cpu_up_helper(...) */
/**
- * mips_cdmm_bus_down() - Tear down the CDMM bus.
- * @data: Pointer to unsigned int CPU number.
+ * mips_cdmm_cpu_down_prep() - Callback for CPUHP DOWN_PREP:
+ * Tear down the CDMM bus.
+ * @cpu: unsigned int CPU number.
*
* This function is executed on the hotplugged CPU and calls the CDMM
* driver cpu_down callback for all devices on that CPU.
*/
-static long mips_cdmm_bus_down(void *data)
+static int mips_cdmm_cpu_down_prep(unsigned int cpu)
{
struct mips_cdmm_bus *bus;
long ret;
/* Inform all the devices on the bus */
- ret = bus_for_each_dev(&mips_cdmm_bustype, NULL, data,
+ ret = bus_for_each_dev(&mips_cdmm_bustype, NULL, &cpu,
mips_cdmm_cpu_down_helper);
/*
@@ -623,8 +624,8 @@
}
/**
- * mips_cdmm_bus_up() - Bring up the CDMM bus.
- * @data: Pointer to unsigned int CPU number.
+ * mips_cdmm_cpu_online() - Callback for CPUHP ONLINE: Bring up the CDMM bus.
+ * @cpu: unsigned int CPU number.
*
* This work_on_cpu callback function is executed on a given CPU to discover
* CDMM devices on that CPU, or to call the CDMM driver cpu_up callback for all
@@ -634,7 +635,7 @@
* initialisation. When CPUs are brought online the function is
* invoked directly on the hotplugged CPU.
*/
-static long mips_cdmm_bus_up(void *data)
+static int mips_cdmm_cpu_online(unsigned int cpu)
{
struct mips_cdmm_bus *bus;
long ret;
@@ -651,51 +652,13 @@
mips_cdmm_bus_discover(bus);
else
/* Inform all the devices on the bus */
- ret = bus_for_each_dev(&mips_cdmm_bustype, NULL, data,
+ ret = bus_for_each_dev(&mips_cdmm_bustype, NULL, &cpu,
mips_cdmm_cpu_up_helper);
return ret;
}
/**
- * mips_cdmm_cpu_notify() - Take action when a CPU is going online or offline.
- * @nb: CPU notifier block .
- * @action: Event that has taken place (CPU_*).
- * @data: CPU number.
- *
- * This notifier is used to keep the CDMM buses updated as CPUs are offlined and
- * onlined. When CPUs go offline or come back online, so does their CDMM bus, so
- * devices must be informed. Also when CPUs come online for the first time the
- * devices on the CDMM bus need discovering.
- *
- * Returns: NOTIFY_OK if event was used.
- * NOTIFY_DONE if we didn't care.
- */
-static int mips_cdmm_cpu_notify(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- unsigned int cpu = (unsigned int)data;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- case CPU_DOWN_FAILED:
- mips_cdmm_bus_up(&cpu);
- break;
- case CPU_DOWN_PREPARE:
- mips_cdmm_bus_down(&cpu);
- break;
- default:
- return NOTIFY_DONE;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block mips_cdmm_cpu_nb = {
- .notifier_call = mips_cdmm_cpu_notify,
-};
-
-/**
* mips_cdmm_init() - Initialise CDMM bus.
*
* Initialise CDMM bus, discover CDMM devices for online CPUs, and arrange for
@@ -703,7 +666,6 @@
*/
static int __init mips_cdmm_init(void)
{
- unsigned int cpu;
int ret;
/* Register the bus */
@@ -712,19 +674,11 @@
return ret;
/* We want to be notified about new CPUs */
- ret = register_cpu_notifier(&mips_cdmm_cpu_nb);
- if (ret) {
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "bus/cdmm:online",
+ mips_cdmm_cpu_online, mips_cdmm_cpu_down_prep);
+ if (ret < 0)
pr_warn("cdmm: Failed to register CPU notifier\n");
- goto out;
- }
- /* Discover devices on CDMM of online CPUs */
- for_each_online_cpu(cpu)
- work_on_cpu(cpu, mips_cdmm_bus_up, &cpu);
-
- return 0;
-out:
- bus_unregister(&mips_cdmm_bustype);
return ret;
}
subsys_initcall(mips_cdmm_init);
diff --git a/drivers/char/bfin-otp.c b/drivers/char/bfin-otp.c
index 44660f1..35d46da 100644
--- a/drivers/char/bfin-otp.c
+++ b/drivers/char/bfin-otp.c
@@ -230,45 +230,7 @@
.name = DRIVER_NAME,
.fops = &bfin_otp_fops,
};
-
-/**
- * bfin_otp_init - Initialize module
- *
- * Registers the device and notifier handler. Actual device
- * initialization is handled by bfin_otp_open().
- */
-static int __init bfin_otp_init(void)
-{
- int ret;
-
- stampit();
-
- ret = misc_register(&bfin_otp_misc_device);
- if (ret) {
- pr_init(KERN_ERR PFX "unable to register a misc device\n");
- return ret;
- }
-
- pr_init(KERN_INFO PFX "initialized\n");
-
- return 0;
-}
-
-/**
- * bfin_otp_exit - Deinitialize module
- *
- * Unregisters the device and notifier handler. Actual device
- * deinitialization is handled by bfin_otp_close().
- */
-static void __exit bfin_otp_exit(void)
-{
- stampit();
-
- misc_deregister(&bfin_otp_misc_device);
-}
-
-module_init(bfin_otp_init);
-module_exit(bfin_otp_exit);
+module_misc_device(bfin_otp_misc_device);
MODULE_AUTHOR("Mike Frysinger <vapier@gentoo.org>");
MODULE_DESCRIPTION("Blackfin OTP Memory Interface");
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index a33163d..5bb1985 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -381,6 +381,9 @@
char *kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
int err = 0;
+ if (!pfn_valid(PFN_DOWN(p)))
+ return -EIO;
+
read = 0;
if (p < (unsigned long) high_memory) {
low_count = count;
@@ -509,6 +512,9 @@
char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
int err = 0;
+ if (!pfn_valid(PFN_DOWN(p)))
+ return -EIO;
+
if (p < (unsigned long) high_memory) {
unsigned long to_write = min_t(unsigned long, count,
(unsigned long)high_memory - p);
diff --git a/drivers/char/mwave/3780i.c b/drivers/char/mwave/3780i.c
index 2874004..972c40a1 100644
--- a/drivers/char/mwave/3780i.c
+++ b/drivers/char/mwave/3780i.c
@@ -124,7 +124,7 @@
MKBYTE(rSlaveControl));
rSlaveControl_Save = rSlaveControl;
- rSlaveControl.ConfigMode = TRUE;
+ rSlaveControl.ConfigMode = true;
PRINTK_2(TRACE_3780I,
"3780i::dsp3780i_WriteGenCfg entry rSlaveControl+ConfigMode %x\n",
@@ -155,7 +155,7 @@
MKBYTE(rSlaveControl) = InByteDsp(DSP_IsaSlaveControl);
rSlaveControl_Save = rSlaveControl;
- rSlaveControl.ConfigMode = TRUE;
+ rSlaveControl.ConfigMode = true;
OutByteDsp(DSP_IsaSlaveControl, MKBYTE(rSlaveControl));
OutByteDsp(DSP_ConfigAddress, (unsigned char) uIndex);
ucValue = InByteDsp(DSP_ConfigData);
@@ -230,7 +230,7 @@
rUartCfg1.BaseIO = 3;
break;
}
- rUartCfg2.Enable = TRUE;
+ rUartCfg2.Enable = true;
}
rHBridgeCfg1.Reserved = rHBridgeCfg2.Reserved = 0;
@@ -238,7 +238,7 @@
rHBridgeCfg1.IrqPulse = pSettings->bDspIrqPulse;
rHBridgeCfg1.Irq = (unsigned char) pIrqMap[pSettings->usDspIrq];
rHBridgeCfg1.AccessMode = 1;
- rHBridgeCfg2.Enable = TRUE;
+ rHBridgeCfg2.Enable = true;
rBusmasterCfg2.Reserved = 0;
@@ -278,8 +278,8 @@
* soft-reset active for 10ms.
*/
rSlaveControl.ClockControl = 0;
- rSlaveControl.SoftReset = TRUE;
- rSlaveControl.ConfigMode = FALSE;
+ rSlaveControl.SoftReset = true;
+ rSlaveControl.ConfigMode = false;
rSlaveControl.Reserved = 0;
PRINTK_4(TRACE_3780I,
@@ -302,7 +302,7 @@
for (i = 0; i < 11; i++)
udelay(2000);
- rSlaveControl.SoftReset = FALSE;
+ rSlaveControl.SoftReset = false;
OutWordDsp(DSP_IsaSlaveControl, MKWORD(rSlaveControl));
MKWORD(tval) = InWordDsp(DSP_IsaSlaveControl);
@@ -326,10 +326,10 @@
}
- rHBridgeControl.EnableDspInt = FALSE;
- rHBridgeControl.MemAutoInc = TRUE;
- rHBridgeControl.IoAutoInc = FALSE;
- rHBridgeControl.DiagnosticMode = FALSE;
+ rHBridgeControl.EnableDspInt = false;
+ rHBridgeControl.MemAutoInc = true;
+ rHBridgeControl.IoAutoInc = false;
+ rHBridgeControl.DiagnosticMode = false;
PRINTK_3(TRACE_3780I,
"3780i::dsp3780i_EnableDSP DSP_HBridgeControl %x rHBridgeControl %x\n",
@@ -345,7 +345,7 @@
ChipID = ReadMsaCfg(DSP_ChipID);
PRINTK_2(TRACE_3780I,
- "3780i::dsp3780I_EnableDSP exiting bRC=TRUE, ChipID %x\n",
+ "3780i::dsp3780I_EnableDSP exiting bRC=true, ChipID %x\n",
ChipID);
return 0;
@@ -361,8 +361,8 @@
PRINTK_1(TRACE_3780I, "3780i::dsp3780i_DisableDSP entry\n");
rSlaveControl.ClockControl = 0;
- rSlaveControl.SoftReset = TRUE;
- rSlaveControl.ConfigMode = FALSE;
+ rSlaveControl.SoftReset = true;
+ rSlaveControl.ConfigMode = false;
rSlaveControl.Reserved = 0;
spin_lock_irqsave(&dsp_lock, flags);
OutWordDsp(DSP_IsaSlaveControl, MKWORD(rSlaveControl));
@@ -398,14 +398,14 @@
PRINTK_2(TRACE_3780I, "3780i::dsp3780i_Reset rHBridgeControl %x\n",
MKWORD(rHBridgeControl));
- rHBridgeControl.EnableDspInt = FALSE;
+ rHBridgeControl.EnableDspInt = false;
OutWordDsp(DSP_HBridgeControl, MKWORD(rHBridgeControl));
spin_unlock_irqrestore(&dsp_lock, flags);
/* Reset the core via the boot domain register */
- rBootDomain.ResetCore = TRUE;
- rBootDomain.Halt = TRUE;
- rBootDomain.NMI = TRUE;
+ rBootDomain.ResetCore = true;
+ rBootDomain.Halt = true;
+ rBootDomain.NMI = true;
rBootDomain.Reserved = 0;
PRINTK_2(TRACE_3780I, "3780i::dsp3780i_Reset rBootDomain %x\n",
@@ -438,26 +438,26 @@
/* Transition the core to a running state */
- rBootDomain.ResetCore = TRUE;
- rBootDomain.Halt = FALSE;
- rBootDomain.NMI = TRUE;
+ rBootDomain.ResetCore = true;
+ rBootDomain.Halt = false;
+ rBootDomain.NMI = true;
rBootDomain.Reserved = 0;
WriteMsaCfg(DSP_MspBootDomain, MKWORD(rBootDomain));
udelay(5);
- rBootDomain.ResetCore = FALSE;
+ rBootDomain.ResetCore = false;
WriteMsaCfg(DSP_MspBootDomain, MKWORD(rBootDomain));
udelay(5);
- rBootDomain.NMI = FALSE;
+ rBootDomain.NMI = false;
WriteMsaCfg(DSP_MspBootDomain, MKWORD(rBootDomain));
udelay(5);
/* Enable DSP to PC interrupt */
spin_lock_irqsave(&dsp_lock, flags);
MKWORD(rHBridgeControl) = InWordDsp(DSP_HBridgeControl);
- rHBridgeControl.EnableDspInt = TRUE;
+ rHBridgeControl.EnableDspInt = true;
PRINTK_2(TRACE_3780I, "3780i::dsp3780i_Run rHBridgeControl %x\n",
MKWORD(rHBridgeControl));
@@ -466,7 +466,7 @@
spin_unlock_irqrestore(&dsp_lock, flags);
- PRINTK_1(TRACE_3780I, "3780i::dsp3780i_Run exit bRC=TRUE\n");
+ PRINTK_1(TRACE_3780I, "3780i::dsp3780i_Run exit bRC=true\n");
return 0;
}
@@ -508,7 +508,7 @@
PRINTK_1(TRACE_3780I,
- "3780I::dsp3780I_ReadDStore exit bRC=TRUE\n");
+ "3780I::dsp3780I_ReadDStore exit bRC=true\n");
return 0;
}
@@ -550,7 +550,7 @@
PRINTK_1(TRACE_3780I,
- "3780I::dsp3780I_ReadAndClearDStore exit bRC=TRUE\n");
+ "3780I::dsp3780I_ReadAndClearDStore exit bRC=true\n");
return 0;
}
@@ -592,7 +592,7 @@
PRINTK_1(TRACE_3780I,
- "3780I::dsp3780D_WriteDStore exit bRC=TRUE\n");
+ "3780I::dsp3780D_WriteDStore exit bRC=true\n");
return 0;
}
@@ -640,7 +640,7 @@
}
PRINTK_1(TRACE_3780I,
- "3780I::dsp3780I_ReadIStore exit bRC=TRUE\n");
+ "3780I::dsp3780I_ReadIStore exit bRC=true\n");
return 0;
}
@@ -689,7 +689,7 @@
}
PRINTK_1(TRACE_3780I,
- "3780I::dsp3780I_WriteIStore exit bRC=TRUE\n");
+ "3780I::dsp3780I_WriteIStore exit bRC=true\n");
return 0;
}
@@ -713,7 +713,7 @@
*/
spin_lock_irqsave(&dsp_lock, flags);
MKWORD(rHBridgeControl) = InWordDsp(DSP_HBridgeControl);
- rHBridgeControl.EnableDspInt = FALSE;
+ rHBridgeControl.EnableDspInt = false;
OutWordDsp(DSP_HBridgeControl, MKWORD(rHBridgeControl));
*pusIPCSource = InWordDsp(DSP_Interrupt);
@@ -725,7 +725,7 @@
OutWordDsp(DSP_Interrupt, (unsigned short) ~(*pusIPCSource));
- rHBridgeControl.EnableDspInt = TRUE;
+ rHBridgeControl.EnableDspInt = true;
OutWordDsp(DSP_HBridgeControl, MKWORD(rHBridgeControl));
spin_unlock_irqrestore(&dsp_lock, flags);
diff --git a/drivers/char/mwave/3780i.h b/drivers/char/mwave/3780i.h
index fba6ab1..9ccb6b2 100644
--- a/drivers/char/mwave/3780i.h
+++ b/drivers/char/mwave/3780i.h
@@ -101,7 +101,7 @@
} DSP_UART_CFG_1;
typedef struct {
- unsigned char Enable:1; /* RW: Enable I/O and IRQ: 0=FALSE, 1=TRUE */
+ unsigned char Enable:1; /* RW: Enable I/O and IRQ: 0=false, 1=true */
unsigned char Reserved:7; /* 0: Reserved */
} DSP_UART_CFG_2;
@@ -114,7 +114,7 @@
} DSP_HBRIDGE_CFG_1;
typedef struct {
- unsigned char Enable:1; /* RW: enable I/O and IRQ: 0=FALSE, 1=TRUE */
+ unsigned char Enable:1; /* RW: enable I/O and IRQ: 0=false, 1=true */
unsigned char Reserved:7; /* 0: Reserved */
} DSP_HBRIDGE_CFG_2;
@@ -133,12 +133,12 @@
typedef struct {
- unsigned char GateIOCHRDY:1; /* RW: Enable IOCHRDY gating: 0=FALSE, 1=TRUE */
+ unsigned char GateIOCHRDY:1; /* RW: Enable IOCHRDY gating: 0=false, 1=true */
unsigned char Reserved:7; /* 0: Reserved */
} DSP_ISA_PROT_CFG;
typedef struct {
- unsigned char Enable:1; /* RW: Enable low power suspend/resume 0=FALSE, 1=TRUE */
+ unsigned char Enable:1; /* RW: Enable low power suspend/resume 0=false, 1=true */
unsigned char Reserved:7; /* 0: Reserved */
} DSP_POWER_MGMT_CFG;
diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c
index 164544a..3a3ff2e 100644
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -296,8 +296,8 @@
pDrvData->IPCs[ipcnum].usIntCount);
mutex_lock(&mwave_mutex);
- pDrvData->IPCs[ipcnum].bIsHere = FALSE;
- pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
+ pDrvData->IPCs[ipcnum].bIsHere = false;
+ pDrvData->IPCs[ipcnum].bIsEnabled = true;
mutex_unlock(&mwave_mutex);
PRINTK_2(TRACE_MWAVE,
@@ -324,7 +324,7 @@
pDrvData->IPCs[ipcnum].usIntCount);
mutex_lock(&mwave_mutex);
- if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
+ if (pDrvData->IPCs[ipcnum].bIsEnabled == true) {
DECLARE_WAITQUEUE(wait, current);
PRINTK_2(TRACE_MWAVE,
@@ -332,7 +332,7 @@
" ipc %x going to sleep\n",
ipcnum);
add_wait_queue(&pDrvData->IPCs[ipcnum].ipc_wait_queue, &wait);
- pDrvData->IPCs[ipcnum].bIsHere = TRUE;
+ pDrvData->IPCs[ipcnum].bIsHere = true;
set_current_state(TASK_INTERRUPTIBLE);
/* check whether an event was signalled by */
/* the interrupt handler while we were gone */
@@ -355,7 +355,7 @@
" application\n",
ipcnum);
}
- pDrvData->IPCs[ipcnum].bIsHere = FALSE;
+ pDrvData->IPCs[ipcnum].bIsHere = false;
remove_wait_queue(&pDrvData->IPCs[ipcnum].ipc_wait_queue, &wait);
set_current_state(TASK_RUNNING);
PRINTK_2(TRACE_MWAVE,
@@ -384,9 +384,9 @@
return -EINVAL;
}
mutex_lock(&mwave_mutex);
- if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
- pDrvData->IPCs[ipcnum].bIsEnabled = FALSE;
- if (pDrvData->IPCs[ipcnum].bIsHere == TRUE) {
+ if (pDrvData->IPCs[ipcnum].bIsEnabled == true) {
+ pDrvData->IPCs[ipcnum].bIsEnabled = false;
+ if (pDrvData->IPCs[ipcnum].bIsHere == true) {
wake_up_interruptible(&pDrvData->IPCs[ipcnum].ipc_wait_queue);
}
}
@@ -541,7 +541,7 @@
if (pDrvData->device_registered) {
device_unregister(&mwave_device);
- pDrvData->device_registered = FALSE;
+ pDrvData->device_registered = false;
}
#endif
@@ -576,16 +576,16 @@
memset(&mwave_s_mdd, 0, sizeof(MWAVE_DEVICE_DATA));
- pDrvData->bBDInitialized = FALSE;
- pDrvData->bResourcesClaimed = FALSE;
- pDrvData->bDSPEnabled = FALSE;
- pDrvData->bDSPReset = FALSE;
- pDrvData->bMwaveDevRegistered = FALSE;
+ pDrvData->bBDInitialized = false;
+ pDrvData->bResourcesClaimed = false;
+ pDrvData->bDSPEnabled = false;
+ pDrvData->bDSPReset = false;
+ pDrvData->bMwaveDevRegistered = false;
pDrvData->sLine = -1;
for (i = 0; i < ARRAY_SIZE(pDrvData->IPCs); i++) {
- pDrvData->IPCs[i].bIsEnabled = FALSE;
- pDrvData->IPCs[i].bIsHere = FALSE;
+ pDrvData->IPCs[i].bIsEnabled = false;
+ pDrvData->IPCs[i].bIsHere = false;
pDrvData->IPCs[i].usIntCount = 0; /* no ints received yet */
init_waitqueue_head(&pDrvData->IPCs[i].ipc_wait_queue);
}
@@ -601,7 +601,7 @@
" Failed to initialize board data\n");
goto cleanup_error;
}
- pDrvData->bBDInitialized = TRUE;
+ pDrvData->bBDInitialized = true;
retval = tp3780I_CalcResources(&pDrvData->rBDData);
PRINTK_2(TRACE_MWAVE,
@@ -626,7 +626,7 @@
" Failed to claim resources\n");
goto cleanup_error;
}
- pDrvData->bResourcesClaimed = TRUE;
+ pDrvData->bResourcesClaimed = true;
retval = tp3780I_EnableDSP(&pDrvData->rBDData);
PRINTK_2(TRACE_MWAVE,
@@ -639,7 +639,7 @@
" Failed to enable DSP\n");
goto cleanup_error;
}
- pDrvData->bDSPEnabled = TRUE;
+ pDrvData->bDSPEnabled = true;
if (misc_register(&mwave_misc_dev) < 0) {
PRINTK_ERROR(KERN_ERR_MWAVE
@@ -647,7 +647,7 @@
" Failed to register misc device\n");
goto cleanup_error;
}
- pDrvData->bMwaveDevRegistered = TRUE;
+ pDrvData->bMwaveDevRegistered = true;
pDrvData->sLine = register_serial_portandirq(
pDrvData->rBDData.rDspSettings.usUartBaseIO,
@@ -668,7 +668,7 @@
if (device_register(&mwave_device))
goto cleanup_error;
- pDrvData->device_registered = TRUE;
+ pDrvData->device_registered = true;
for (i = 0; i < ARRAY_SIZE(mwave_dev_attrs); i++) {
if(device_create_file(&mwave_device, mwave_dev_attrs[i])) {
PRINTK_ERROR(KERN_ERR_MWAVE
diff --git a/drivers/char/mwave/mwavedd.h b/drivers/char/mwave/mwavedd.h
index 7e0d530..37e0a49 100644
--- a/drivers/char/mwave/mwavedd.h
+++ b/drivers/char/mwave/mwavedd.h
@@ -125,8 +125,8 @@
typedef struct _MWAVE_IPC {
unsigned short usIntCount; /* 0=none, 1=first, 2=greater than 1st */
- BOOLEAN bIsEnabled;
- BOOLEAN bIsHere;
+ bool bIsEnabled;
+ bool bIsHere;
/* entry spin lock */
wait_queue_head_t ipc_wait_queue;
} MWAVE_IPC;
@@ -135,12 +135,12 @@
THINKPAD_BD_DATA rBDData; /* board driver's data area */
unsigned long ulIPCSource_ISR; /* IPC source bits for recently processed intr, set during ISR processing */
unsigned long ulIPCSource_DPC; /* IPC source bits for recently processed intr, set during DPC processing */
- BOOLEAN bBDInitialized;
- BOOLEAN bResourcesClaimed;
- BOOLEAN bDSPEnabled;
- BOOLEAN bDSPReset;
+ bool bBDInitialized;
+ bool bResourcesClaimed;
+ bool bDSPEnabled;
+ bool bDSPReset;
MWAVE_IPC IPCs[16];
- BOOLEAN bMwaveDevRegistered;
+ bool bMwaveDevRegistered;
short sLine;
int nr_registered_attrs;
int device_registered;
diff --git a/drivers/char/mwave/smapi.c b/drivers/char/mwave/smapi.c
index 6187fd1..8c5411a 100644
--- a/drivers/char/mwave/smapi.c
+++ b/drivers/char/mwave/smapi.c
@@ -493,7 +493,7 @@
}
-int smapi_set_DSP_power_state(BOOLEAN bOn)
+int smapi_set_DSP_power_state(bool bOn)
{
int bRC = -EIO;
unsigned short usAX, usBX, usCX, usDX, usDI, usSI;
@@ -556,7 +556,7 @@
PRINTK_ERROR("smapi::smapi_init, ERROR unable to read from SMAPI port\n");
} else {
PRINTK_2(TRACE_SMAPI,
- "smapi::smapi_init, exit TRUE g_usSmapiPort %x\n",
+ "smapi::smapi_init, exit true g_usSmapiPort %x\n",
g_usSmapiPort);
retval = 0;
//SmapiQuerySystemID();
diff --git a/drivers/char/mwave/smapi.h b/drivers/char/mwave/smapi.h
index 64b2ec1..ebc206b 100644
--- a/drivers/char/mwave/smapi.h
+++ b/drivers/char/mwave/smapi.h
@@ -49,10 +49,6 @@
#ifndef _LINUX_SMAPI_H
#define _LINUX_SMAPI_H
-#define TRUE 1
-#define FALSE 0
-#define BOOLEAN int
-
typedef struct {
int bDSPPresent;
int bDSPEnabled;
@@ -74,7 +70,7 @@
int smapi_init(void);
int smapi_query_DSP_cfg(SMAPI_DSP_SETTINGS * pSettings);
int smapi_set_DSP_cfg(void);
-int smapi_set_DSP_power_state(BOOLEAN bOn);
+int smapi_set_DSP_power_state(bool bOn);
#endif
diff --git a/drivers/char/mwave/tp3780i.c b/drivers/char/mwave/tp3780i.c
index 04e6d6a..5e1618a 100644
--- a/drivers/char/mwave/tp3780i.c
+++ b/drivers/char/mwave/tp3780i.c
@@ -80,13 +80,13 @@
WriteMsaCfg(DSP_GpioModeControl_15_8, MKWORD(rGpioMode));
MKWORD(rGpioDriverEnable) = 0;
- rGpioDriverEnable.Enable10 = TRUE;
- rGpioDriverEnable.Mask10 = TRUE;
+ rGpioDriverEnable.Enable10 = true;
+ rGpioDriverEnable.Mask10 = true;
WriteMsaCfg(DSP_GpioDriverEnable_15_8, MKWORD(rGpioDriverEnable));
MKWORD(rGpioOutputData) = 0;
rGpioOutputData.Latch10 = 0;
- rGpioOutputData.Mask10 = TRUE;
+ rGpioOutputData.Mask10 = true;
WriteMsaCfg(DSP_GpioOutputData_15_8, MKWORD(rGpioOutputData));
PRINTK_1(TRACE_TP3780I, "tp3780i::EnableSRAM exit\n");
@@ -127,7 +127,7 @@
PRINTK_2(TRACE_TP3780I,
"tp3780i::DspInterrupt usIntCount %x\n",
pDrvData->IPCs[usPCNum - 1].usIntCount);
- if (pDrvData->IPCs[usPCNum - 1].bIsEnabled == TRUE) {
+ if (pDrvData->IPCs[usPCNum - 1].bIsEnabled == true) {
PRINTK_2(TRACE_TP3780I,
"tp3780i::DspInterrupt, waking up usPCNum %x\n",
usPCNum - 1);
@@ -160,8 +160,8 @@
PRINTK_2(TRACE_TP3780I, "tp3780i::tp3780I_InitializeBoardData entry pBDData %p\n", pBDData);
- pBDData->bDSPEnabled = FALSE;
- pSettings->bInterruptClaimed = FALSE;
+ pBDData->bDSPEnabled = false;
+ pSettings->bInterruptClaimed = false;
retval = smapi_init();
if (retval) {
@@ -269,7 +269,7 @@
if (pSettings->bInterruptClaimed) {
free_irq(pSettings->usDspIrq, NULL);
- pSettings->bInterruptClaimed = FALSE;
+ pSettings->bInterruptClaimed = false;
}
PRINTK_2(TRACE_TP3780I,
@@ -283,7 +283,7 @@
int tp3780I_EnableDSP(THINKPAD_BD_DATA * pBDData)
{
DSP_3780I_CONFIG_SETTINGS *pSettings = &pBDData->rDspSettings;
- BOOLEAN bDSPPoweredUp = FALSE, bInterruptAllocated = FALSE;
+ bool bDSPPoweredUp = false, bInterruptAllocated = false;
PRINTK_2(TRACE_TP3780I, "tp3780i::tp3780I_EnableDSP entry pBDData %p\n", pBDData);
@@ -336,14 +336,14 @@
}
}
- pSettings->bDspIrqActiveLow = pSettings->bDspIrqPulse = TRUE;
- pSettings->bUartIrqActiveLow = pSettings->bUartIrqPulse = TRUE;
+ pSettings->bDspIrqActiveLow = pSettings->bDspIrqPulse = true;
+ pSettings->bUartIrqActiveLow = pSettings->bUartIrqPulse = true;
if (pBDData->bShareDspIrq) {
- pSettings->bDspIrqActiveLow = FALSE;
+ pSettings->bDspIrqActiveLow = false;
}
if (pBDData->bShareUartIrq) {
- pSettings->bUartIrqActiveLow = FALSE;
+ pSettings->bUartIrqActiveLow = false;
}
pSettings->usNumTransfers = TP_CFG_NumTransfers;
@@ -373,16 +373,16 @@
PRINTK_3(TRACE_TP3780I,
"tp3780i::tp3780I_EnableDSP, got interrupt %x bShareDspIrq %x\n",
pSettings->usDspIrq, pBDData->bShareDspIrq);
- bInterruptAllocated = TRUE;
- pSettings->bInterruptClaimed = TRUE;
+ bInterruptAllocated = true;
+ pSettings->bInterruptClaimed = true;
}
- smapi_set_DSP_power_state(FALSE);
- if (smapi_set_DSP_power_state(TRUE)) {
- PRINTK_ERROR(KERN_ERR_MWAVE "tp3780i::tp3780I_EnableDSP: Error: smapi_set_DSP_power_state(TRUE) failed\n");
+ smapi_set_DSP_power_state(false);
+ if (smapi_set_DSP_power_state(true)) {
+ PRINTK_ERROR(KERN_ERR_MWAVE "tp3780i::tp3780I_EnableDSP: Error: smapi_set_DSP_power_state(true) failed\n");
goto exit_cleanup;
} else {
- bDSPPoweredUp = TRUE;
+ bDSPPoweredUp = true;
}
if (dsp3780I_EnableDSP(pSettings, s_ausThinkpadIrqToField, s_ausThinkpadDmaToField)) {
@@ -392,7 +392,7 @@
EnableSRAM(pBDData);
- pBDData->bDSPEnabled = TRUE;
+ pBDData->bDSPEnabled = true;
PRINTK_1(TRACE_TP3780I, "tp3780i::tp3780I_EnableDSP exit\n");
@@ -401,10 +401,10 @@
exit_cleanup:
PRINTK_ERROR("tp3780i::tp3780I_EnableDSP: Cleaning up\n");
if (bDSPPoweredUp)
- smapi_set_DSP_power_state(FALSE);
+ smapi_set_DSP_power_state(false);
if (bInterruptAllocated) {
free_irq(pSettings->usDspIrq, NULL);
- pSettings->bInterruptClaimed = FALSE;
+ pSettings->bInterruptClaimed = false;
}
return -EIO;
}
@@ -421,10 +421,10 @@
dsp3780I_DisableDSP(&pBDData->rDspSettings);
if (pSettings->bInterruptClaimed) {
free_irq(pSettings->usDspIrq, NULL);
- pSettings->bInterruptClaimed = FALSE;
+ pSettings->bInterruptClaimed = false;
}
- smapi_set_DSP_power_state(FALSE);
- pBDData->bDSPEnabled = FALSE;
+ smapi_set_DSP_power_state(false);
+ pBDData->bDSPEnabled = false;
}
PRINTK_2(TRACE_TP3780I, "tp3780i::tp3780I_DisableDSP exit retval %x\n", retval);
@@ -516,7 +516,7 @@
int retval = 0;
DSP_3780I_CONFIG_SETTINGS *pSettings = &pBDData->rDspSettings;
unsigned short usDspBaseIO = pSettings->usDspBaseIO;
- BOOLEAN bRC = 0;
+ bool bRC = 0;
PRINTK_6(TRACE_TP3780I,
"tp3780i::tp3780I_ReadWriteDspDStore entry pBDData %p, uOpcode %x, pvBuffer %p, uCount %x, ulDSPAddr %lx\n",
@@ -552,7 +552,7 @@
int retval = 0;
DSP_3780I_CONFIG_SETTINGS *pSettings = &pBDData->rDspSettings;
unsigned short usDspBaseIO = pSettings->usDspBaseIO;
- BOOLEAN bRC = 0;
+ bool bRC = 0;
PRINTK_6(TRACE_TP3780I,
"tp3780i::tp3780I_ReadWriteDspIStore entry pBDData %p, uOpcode %x, pvBuffer %p, uCount %x, ulDSPAddr %lx\n",
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index f8a483c..d233688 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -286,7 +286,7 @@
struct parport *port;
struct pardevice *pdev = NULL;
char *name;
- int fl;
+ struct pardev_cb ppdev_cb;
name = kasprintf(GFP_KERNEL, CHRDEV "%x", minor);
if (name == NULL)
@@ -299,9 +299,11 @@
return -ENXIO;
}
- fl = (pp->flags & PP_EXCL) ? PARPORT_FLAG_EXCL : 0;
- pdev = parport_register_device(port, name, NULL,
- NULL, pp_irq, fl, pp);
+ memset(&ppdev_cb, 0, sizeof(ppdev_cb));
+ ppdev_cb.irq_func = pp_irq;
+ ppdev_cb.flags = (pp->flags & PP_EXCL) ? PARPORT_FLAG_EXCL : 0;
+ ppdev_cb.private = pp;
+ pdev = parport_register_dev_model(port, name, &ppdev_cb, minor);
parport_put_port(port);
if (!pdev) {
@@ -799,10 +801,23 @@
device_destroy(ppdev_class, MKDEV(PP_MAJOR, port->number));
}
+static int pp_probe(struct pardevice *par_dev)
+{
+ struct device_driver *drv = par_dev->dev.driver;
+ int len = strlen(drv->name);
+
+ if (strncmp(par_dev->name, drv->name, len))
+ return -ENODEV;
+
+ return 0;
+}
+
static struct parport_driver pp_driver = {
.name = CHRDEV,
- .attach = pp_attach,
+ .probe = pp_probe,
+ .match_port = pp_attach,
.detach = pp_detach,
+ .devmodel = true,
};
static int __init ppdev_init(void)
diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c
index 94006f9..10e5632 100644
--- a/drivers/char/snsc.c
+++ b/drivers/char/snsc.c
@@ -385,13 +385,18 @@
event_nasid = ia64_sn_get_console_nasid();
+ snsc_class = class_create(THIS_MODULE, SYSCTL_BASENAME);
+ if (IS_ERR(snsc_class)) {
+ printk("%s: failed to allocate class\n", __func__);
+ return PTR_ERR(snsc_class);
+ }
+
if (alloc_chrdev_region(&first_dev, 0, num_cnodes,
SYSCTL_BASENAME) < 0) {
printk("%s: failed to register SN system controller device\n",
__func__);
return -ENODEV;
}
- snsc_class = class_create(THIS_MODULE, SYSCTL_BASENAME);
for (cnode = 0; cnode < num_cnodes; cnode++) {
geoid = cnodeid_get_geoid(cnode);
diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c
index 69f6b4a..398800e 100644
--- a/drivers/char/tile-srom.c
+++ b/drivers/char/tile-srom.c
@@ -331,13 +331,11 @@
/**
* srom_setup_minor() - Initialize per-minor information.
* @srom: Per-device SROM state.
- * @index: Device to set up.
+ * @devhdl: Partition device handle.
*/
-static int srom_setup_minor(struct srom_dev *srom, int index)
+static int srom_setup_minor(struct srom_dev *srom, int devhdl)
{
- struct device *dev;
- int devhdl = srom->hv_devhdl;
-
+ srom->hv_devhdl = devhdl;
mutex_init(&srom->lock);
if (_srom_read(devhdl, &srom->total_size,
@@ -350,9 +348,7 @@
SROM_PAGE_SIZE_OFF, sizeof(srom->page_size)) < 0)
return -EIO;
- dev = device_create(srom_class, &srom_parent->dev,
- MKDEV(srom_major, index), srom, "%d", index);
- return PTR_ERR_OR_ZERO(dev);
+ return 0;
}
/** srom_init() - Initialize the driver's module. */
@@ -365,7 +361,7 @@
* Start with a plausible number of partitions; the krealloc() call
* below will yield about log(srom_devs) additional allocations.
*/
- srom_devices = kzalloc(4 * sizeof(struct srom_dev), GFP_KERNEL);
+ srom_devices = kmalloc(4 * sizeof(struct srom_dev), GFP_KERNEL);
/* Discover the number of srom partitions. */
for (i = 0; ; i++) {
@@ -373,7 +369,7 @@
char buf[20];
struct srom_dev *new_srom_devices =
krealloc(srom_devices, (i+1) * sizeof(struct srom_dev),
- GFP_KERNEL | __GFP_ZERO);
+ GFP_KERNEL);
if (!new_srom_devices) {
result = -ENOMEM;
goto fail_mem;
@@ -387,7 +383,9 @@
i, devhdl);
break;
}
- srom_devices[i].hv_devhdl = devhdl;
+ result = srom_setup_minor(&srom_devices[i], devhdl);
+ if (result != 0)
+ goto fail_mem;
}
srom_devs = i;
@@ -431,9 +429,13 @@
srom_class->dev_groups = srom_dev_groups;
srom_class->devnode = srom_devnode;
- /* Do per-partition initialization */
+ /* Create per-partition devices */
for (i = 0; i < srom_devs; i++) {
- result = srom_setup_minor(srom_devices + i, i);
+ struct device *dev =
+ device_create(srom_class, &srom_parent->dev,
+ MKDEV(srom_major, i), srom_devices + i,
+ "%d", i);
+ result = PTR_ERR_OR_ZERO(dev);
if (result < 0)
goto fail_class;
}
diff --git a/drivers/char/ttyprintk.c b/drivers/char/ttyprintk.c
index b098d2d..67549ce 100644
--- a/drivers/char/ttyprintk.c
+++ b/drivers/char/ttyprintk.c
@@ -31,60 +31,53 @@
* printk messages (also suitable for logging service):
* - any cr is replaced by nl
* - adds a ttyprintk source tag in front of each line
- * - too long message is fragmeted, with '\'nl between fragments
- * - TPK_STR_SIZE isn't really the write_room limiting factor, bcause
+ * - too long message is fragmented, with '\'nl between fragments
+ * - TPK_STR_SIZE isn't really the write_room limiting factor, because
* it is emptied on the fly during preformatting.
*/
#define TPK_STR_SIZE 508 /* should be bigger then max expected line length */
#define TPK_MAX_ROOM 4096 /* we could assume 4K for instance */
-static const char *tpk_tag = "[U] "; /* U for User */
static int tpk_curr;
+static char tpk_buffer[TPK_STR_SIZE + 4];
+
+static void tpk_flush(void)
+{
+ if (tpk_curr > 0) {
+ tpk_buffer[tpk_curr] = '\0';
+ pr_info("[U] %s\n", tpk_buffer);
+ tpk_curr = 0;
+ }
+}
+
static int tpk_printk(const unsigned char *buf, int count)
{
- static char tmp[TPK_STR_SIZE + 4];
int i = tpk_curr;
if (buf == NULL) {
- /* flush tmp[] */
- if (tpk_curr > 0) {
- /* non nl or cr terminated message - add nl */
- tmp[tpk_curr + 0] = '\n';
- tmp[tpk_curr + 1] = '\0';
- printk(KERN_INFO "%s%s", tpk_tag, tmp);
- tpk_curr = 0;
- }
+ tpk_flush();
return i;
}
for (i = 0; i < count; i++) {
- tmp[tpk_curr] = buf[i];
- if (tpk_curr < TPK_STR_SIZE) {
- switch (buf[i]) {
- case '\r':
- /* replace cr with nl */
- tmp[tpk_curr + 0] = '\n';
- tmp[tpk_curr + 1] = '\0';
- printk(KERN_INFO "%s%s", tpk_tag, tmp);
- tpk_curr = 0;
- if ((i + 1) < count && buf[i + 1] == '\n')
- i++;
- break;
- case '\n':
- tmp[tpk_curr + 1] = '\0';
- printk(KERN_INFO "%s%s", tpk_tag, tmp);
- tpk_curr = 0;
- break;
- default:
- tpk_curr++;
- }
- } else {
+ if (tpk_curr >= TPK_STR_SIZE) {
/* end of tmp buffer reached: cut the message in two */
- tmp[tpk_curr + 1] = '\\';
- tmp[tpk_curr + 2] = '\n';
- tmp[tpk_curr + 3] = '\0';
- printk(KERN_INFO "%s%s", tpk_tag, tmp);
- tpk_curr = 0;
+ tpk_buffer[tpk_curr++] = '\\';
+ tpk_flush();
+ }
+
+ switch (buf[i]) {
+ case '\r':
+ tpk_flush();
+ if ((i + 1) < count && buf[i + 1] == '\n')
+ i++;
+ break;
+ case '\n':
+ tpk_flush();
+ break;
+ default:
+ tpk_buffer[tpk_curr++] = buf[i];
+ break;
}
}
diff --git a/drivers/char/xillybus/xillybus_core.c b/drivers/char/xillybus/xillybus_core.c
index dcd19f3..b6c9cde 100644
--- a/drivers/char/xillybus/xillybus_core.c
+++ b/drivers/char/xillybus/xillybus_core.c
@@ -655,10 +655,10 @@
version = channel->wr_buffers[0]->addr;
- /* Check version number. Accept anything below 0x82 for now. */
+ /* Check version number. Reject anything above 0x82. */
if (*version > 0x82) {
dev_err(endpoint->dev,
- "No support for IDT version 0x%02x. Maybe the xillybus driver needs an upgarde. Aborting.\n",
+ "No support for IDT version 0x%02x. Maybe the xillybus driver needs an upgrade. Aborting.\n",
*version);
return -ENODEV;
}
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 5677886..8a753fd 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -305,6 +305,16 @@
This must be disabled for hardware validation purposes to detect any
hardware anomalies of missing events.
+config FSL_ERRATUM_A008585
+ bool "Workaround for Freescale/NXP Erratum A-008585"
+ default y
+ depends on ARM_ARCH_TIMER && ARM64
+ help
+ This option enables a workaround for Freescale/NXP Erratum
+ A-008585 ("ARM generic timer may contain an erroneous
+ value"). The workaround will only be active if the
+ fsl,erratum-a008585 property is found in the timer node.
+
config ARM_GLOBAL_TIMER
bool "Support for the ARM global timer" if COMPILE_TEST
select CLKSRC_OF if OF
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 5770054..73c487d 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -94,6 +94,43 @@
* Architected system timer support.
*/
+#ifdef CONFIG_FSL_ERRATUM_A008585
+DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled);
+EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled);
+
+static int fsl_a008585_enable = -1;
+
+static int __init early_fsl_a008585_cfg(char *buf)
+{
+ int ret;
+ bool val;
+
+ ret = strtobool(buf, &val);
+ if (ret)
+ return ret;
+
+ fsl_a008585_enable = val;
+ return 0;
+}
+early_param("clocksource.arm_arch_timer.fsl-a008585", early_fsl_a008585_cfg);
+
+u32 __fsl_a008585_read_cntp_tval_el0(void)
+{
+ return __fsl_a008585_read_reg(cntp_tval_el0);
+}
+
+u32 __fsl_a008585_read_cntv_tval_el0(void)
+{
+ return __fsl_a008585_read_reg(cntv_tval_el0);
+}
+
+u64 __fsl_a008585_read_cntvct_el0(void)
+{
+ return __fsl_a008585_read_reg(cntvct_el0);
+}
+EXPORT_SYMBOL(__fsl_a008585_read_cntvct_el0);
+#endif /* CONFIG_FSL_ERRATUM_A008585 */
+
static __always_inline
void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val,
struct clock_event_device *clk)
@@ -243,6 +280,40 @@
arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
}
+#ifdef CONFIG_FSL_ERRATUM_A008585
+static __always_inline void fsl_a008585_set_next_event(const int access,
+ unsigned long evt, struct clock_event_device *clk)
+{
+ unsigned long ctrl;
+ u64 cval = evt + arch_counter_get_cntvct();
+
+ ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
+ ctrl |= ARCH_TIMER_CTRL_ENABLE;
+ ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
+
+ if (access == ARCH_TIMER_PHYS_ACCESS)
+ write_sysreg(cval, cntp_cval_el0);
+ else if (access == ARCH_TIMER_VIRT_ACCESS)
+ write_sysreg(cval, cntv_cval_el0);
+
+ arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
+}
+
+static int fsl_a008585_set_next_event_virt(unsigned long evt,
+ struct clock_event_device *clk)
+{
+ fsl_a008585_set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk);
+ return 0;
+}
+
+static int fsl_a008585_set_next_event_phys(unsigned long evt,
+ struct clock_event_device *clk)
+{
+ fsl_a008585_set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk);
+ return 0;
+}
+#endif /* CONFIG_FSL_ERRATUM_A008585 */
+
static int arch_timer_set_next_event_virt(unsigned long evt,
struct clock_event_device *clk)
{
@@ -271,6 +342,19 @@
return 0;
}
+static void fsl_a008585_set_sne(struct clock_event_device *clk)
+{
+#ifdef CONFIG_FSL_ERRATUM_A008585
+ if (!static_branch_unlikely(&arch_timer_read_ool_enabled))
+ return;
+
+ if (arch_timer_uses_ppi == VIRT_PPI)
+ clk->set_next_event = fsl_a008585_set_next_event_virt;
+ else
+ clk->set_next_event = fsl_a008585_set_next_event_phys;
+#endif
+}
+
static void __arch_timer_setup(unsigned type,
struct clock_event_device *clk)
{
@@ -299,6 +383,8 @@
default:
BUG();
}
+
+ fsl_a008585_set_sne(clk);
} else {
clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
clk->name = "arch_mem_timer";
@@ -515,15 +601,19 @@
arch_timer_read_counter = arch_counter_get_cntvct;
else
arch_timer_read_counter = arch_counter_get_cntpct;
+
+ clocksource_counter.archdata.vdso_direct = true;
+
+#ifdef CONFIG_FSL_ERRATUM_A008585
+ /*
+ * Don't use the vdso fastpath if errata require using
+ * the out-of-line counter accessor.
+ */
+ if (static_branch_unlikely(&arch_timer_read_ool_enabled))
+ clocksource_counter.archdata.vdso_direct = false;
+#endif
} else {
arch_timer_read_counter = arch_counter_get_cntvct_mem;
-
- /* If the clocksource name is "arch_sys_counter" the
- * VDSO will attempt to read the CP15-based counter.
- * Ensure this does not happen when CP15-based
- * counter is not available.
- */
- clocksource_counter.name = "arch_mem_counter";
}
start_count = arch_timer_read_counter();
@@ -800,6 +890,15 @@
arch_timer_c3stop = !of_property_read_bool(np, "always-on");
+#ifdef CONFIG_FSL_ERRATUM_A008585
+ if (fsl_a008585_enable < 0)
+ fsl_a008585_enable = of_property_read_bool(np, "fsl,erratum-a008585");
+ if (fsl_a008585_enable) {
+ static_branch_enable(&arch_timer_read_ool_enabled);
+ pr_info("Enabling workaround for FSL erratum A-008585\n");
+ }
+#endif
+
/*
* If we cannot rely on firmware initializing the timer registers then
* we should use the physical timers instead.
diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c
index b4b3ab5..7a960cd 100644
--- a/drivers/clocksource/mips-gic-timer.c
+++ b/drivers/clocksource/mips-gic-timer.c
@@ -109,12 +109,15 @@
{
int ret;
- if (!cpu_has_counter || !gic_frequency)
+ if (!gic_frequency)
return -ENXIO;
ret = setup_percpu_irq(gic_timer_irq, &gic_compare_irqaction);
- if (ret < 0)
+ if (ret < 0) {
+ pr_err("GIC timer IRQ %d setup failed: %d\n",
+ gic_timer_irq, ret);
return ret;
+ }
cpuhp_setup_state(CPUHP_AP_MIPS_GIC_TIMER_STARTING,
"AP_MIPS_GIC_TIMER_STARTING", gic_starting_cpu,
diff --git a/drivers/clocksource/moxart_timer.c b/drivers/clocksource/moxart_timer.c
index 8414544..2a8f470 100644
--- a/drivers/clocksource/moxart_timer.c
+++ b/drivers/clocksource/moxart_timer.c
@@ -21,6 +21,7 @@
#include <linux/io.h>
#include <linux/clocksource.h>
#include <linux/bitops.h>
+#include <linux/slab.h>
#define TIMER1_BASE 0x00
#define TIMER2_BASE 0x10
@@ -36,75 +37,109 @@
#define TIMER_INTR_MASK 0x38
/*
- * TIMER_CR flags:
+ * Moxart TIMER_CR flags:
*
- * TIMEREG_CR_*_CLOCK 0: PCLK, 1: EXT1CLK
- * TIMEREG_CR_*_INT overflow interrupt enable bit
+ * MOXART_CR_*_CLOCK 0: PCLK, 1: EXT1CLK
+ * MOXART_CR_*_INT overflow interrupt enable bit
*/
-#define TIMEREG_CR_1_ENABLE BIT(0)
-#define TIMEREG_CR_1_CLOCK BIT(1)
-#define TIMEREG_CR_1_INT BIT(2)
-#define TIMEREG_CR_2_ENABLE BIT(3)
-#define TIMEREG_CR_2_CLOCK BIT(4)
-#define TIMEREG_CR_2_INT BIT(5)
-#define TIMEREG_CR_3_ENABLE BIT(6)
-#define TIMEREG_CR_3_CLOCK BIT(7)
-#define TIMEREG_CR_3_INT BIT(8)
-#define TIMEREG_CR_COUNT_UP BIT(9)
+#define MOXART_CR_1_ENABLE BIT(0)
+#define MOXART_CR_1_CLOCK BIT(1)
+#define MOXART_CR_1_INT BIT(2)
+#define MOXART_CR_2_ENABLE BIT(3)
+#define MOXART_CR_2_CLOCK BIT(4)
+#define MOXART_CR_2_INT BIT(5)
+#define MOXART_CR_3_ENABLE BIT(6)
+#define MOXART_CR_3_CLOCK BIT(7)
+#define MOXART_CR_3_INT BIT(8)
+#define MOXART_CR_COUNT_UP BIT(9)
-#define TIMER1_ENABLE (TIMEREG_CR_2_ENABLE | TIMEREG_CR_1_ENABLE)
-#define TIMER1_DISABLE (TIMEREG_CR_2_ENABLE)
+#define MOXART_TIMER1_ENABLE (MOXART_CR_2_ENABLE | MOXART_CR_1_ENABLE)
+#define MOXART_TIMER1_DISABLE (MOXART_CR_2_ENABLE)
-static void __iomem *base;
-static unsigned int clock_count_per_tick;
+/*
+ * The ASpeed variant of the IP block has a different layout
+ * for the control register
+ */
+#define ASPEED_CR_1_ENABLE BIT(0)
+#define ASPEED_CR_1_CLOCK BIT(1)
+#define ASPEED_CR_1_INT BIT(2)
+#define ASPEED_CR_2_ENABLE BIT(4)
+#define ASPEED_CR_2_CLOCK BIT(5)
+#define ASPEED_CR_2_INT BIT(6)
+#define ASPEED_CR_3_ENABLE BIT(8)
+#define ASPEED_CR_3_CLOCK BIT(9)
+#define ASPEED_CR_3_INT BIT(10)
+
+#define ASPEED_TIMER1_ENABLE (ASPEED_CR_2_ENABLE | ASPEED_CR_1_ENABLE)
+#define ASPEED_TIMER1_DISABLE (ASPEED_CR_2_ENABLE)
+
+struct moxart_timer {
+ void __iomem *base;
+ unsigned int t1_disable_val;
+ unsigned int t1_enable_val;
+ unsigned int count_per_tick;
+ struct clock_event_device clkevt;
+};
+
+static inline struct moxart_timer *to_moxart(struct clock_event_device *evt)
+{
+ return container_of(evt, struct moxart_timer, clkevt);
+}
+
+static inline void moxart_disable(struct clock_event_device *evt)
+{
+ struct moxart_timer *timer = to_moxart(evt);
+
+ writel(timer->t1_disable_val, timer->base + TIMER_CR);
+}
+
+static inline void moxart_enable(struct clock_event_device *evt)
+{
+ struct moxart_timer *timer = to_moxart(evt);
+
+ writel(timer->t1_enable_val, timer->base + TIMER_CR);
+}
static int moxart_shutdown(struct clock_event_device *evt)
{
- writel(TIMER1_DISABLE, base + TIMER_CR);
+ moxart_disable(evt);
return 0;
}
static int moxart_set_oneshot(struct clock_event_device *evt)
{
- writel(TIMER1_DISABLE, base + TIMER_CR);
- writel(~0, base + TIMER1_BASE + REG_LOAD);
+ moxart_disable(evt);
+ writel(~0, to_moxart(evt)->base + TIMER1_BASE + REG_LOAD);
return 0;
}
static int moxart_set_periodic(struct clock_event_device *evt)
{
- writel(clock_count_per_tick, base + TIMER1_BASE + REG_LOAD);
- writel(TIMER1_ENABLE, base + TIMER_CR);
+ struct moxart_timer *timer = to_moxart(evt);
+
+ moxart_disable(evt);
+ writel(timer->count_per_tick, timer->base + TIMER1_BASE + REG_LOAD);
+ writel(0, timer->base + TIMER1_BASE + REG_MATCH1);
+ moxart_enable(evt);
return 0;
}
static int moxart_clkevt_next_event(unsigned long cycles,
- struct clock_event_device *unused)
+ struct clock_event_device *evt)
{
+ struct moxart_timer *timer = to_moxart(evt);
u32 u;
- writel(TIMER1_DISABLE, base + TIMER_CR);
+ moxart_disable(evt);
- u = readl(base + TIMER1_BASE + REG_COUNT) - cycles;
- writel(u, base + TIMER1_BASE + REG_MATCH1);
+ u = readl(timer->base + TIMER1_BASE + REG_COUNT) - cycles;
+ writel(u, timer->base + TIMER1_BASE + REG_MATCH1);
- writel(TIMER1_ENABLE, base + TIMER_CR);
+ moxart_enable(evt);
return 0;
}
-static struct clock_event_device moxart_clockevent = {
- .name = "moxart_timer",
- .rating = 200,
- .features = CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_ONESHOT,
- .set_state_shutdown = moxart_shutdown,
- .set_state_periodic = moxart_set_periodic,
- .set_state_oneshot = moxart_set_oneshot,
- .tick_resume = moxart_set_oneshot,
- .set_next_event = moxart_clkevt_next_event,
-};
-
static irqreturn_t moxart_timer_interrupt(int irq, void *dev_id)
{
struct clock_event_device *evt = dev_id;
@@ -112,21 +147,19 @@
return IRQ_HANDLED;
}
-static struct irqaction moxart_timer_irq = {
- .name = "moxart-timer",
- .flags = IRQF_TIMER,
- .handler = moxart_timer_interrupt,
- .dev_id = &moxart_clockevent,
-};
-
static int __init moxart_timer_init(struct device_node *node)
{
int ret, irq;
unsigned long pclk;
struct clk *clk;
+ struct moxart_timer *timer;
- base = of_iomap(node, 0);
- if (!base) {
+ timer = kzalloc(sizeof(*timer), GFP_KERNEL);
+ if (!timer)
+ return -ENOMEM;
+
+ timer->base = of_iomap(node, 0);
+ if (!timer->base) {
pr_err("%s: of_iomap failed\n", node->full_name);
return -ENXIO;
}
@@ -137,12 +170,6 @@
return -EINVAL;
}
- ret = setup_irq(irq, &moxart_timer_irq);
- if (ret) {
- pr_err("%s: setup_irq failed\n", node->full_name);
- return ret;
- }
-
clk = of_clk_get(node, 0);
if (IS_ERR(clk)) {
pr_err("%s: of_clk_get failed\n", node->full_name);
@@ -151,7 +178,32 @@
pclk = clk_get_rate(clk);
- ret = clocksource_mmio_init(base + TIMER2_BASE + REG_COUNT,
+ if (of_device_is_compatible(node, "moxa,moxart-timer")) {
+ timer->t1_enable_val = MOXART_TIMER1_ENABLE;
+ timer->t1_disable_val = MOXART_TIMER1_DISABLE;
+ } else if (of_device_is_compatible(node, "aspeed,ast2400-timer")) {
+ timer->t1_enable_val = ASPEED_TIMER1_ENABLE;
+ timer->t1_disable_val = ASPEED_TIMER1_DISABLE;
+ } else {
+ pr_err("%s: unknown platform\n", node->full_name);
+ return -EINVAL;
+ }
+
+ timer->count_per_tick = DIV_ROUND_CLOSEST(pclk, HZ);
+
+ timer->clkevt.name = node->name;
+ timer->clkevt.rating = 200;
+ timer->clkevt.features = CLOCK_EVT_FEAT_PERIODIC |
+ CLOCK_EVT_FEAT_ONESHOT;
+ timer->clkevt.set_state_shutdown = moxart_shutdown;
+ timer->clkevt.set_state_periodic = moxart_set_periodic;
+ timer->clkevt.set_state_oneshot = moxart_set_oneshot;
+ timer->clkevt.tick_resume = moxart_set_oneshot;
+ timer->clkevt.set_next_event = moxart_clkevt_next_event;
+ timer->clkevt.cpumask = cpumask_of(0);
+ timer->clkevt.irq = irq;
+
+ ret = clocksource_mmio_init(timer->base + TIMER2_BASE + REG_COUNT,
"moxart_timer", pclk, 200, 32,
clocksource_mmio_readl_down);
if (ret) {
@@ -159,13 +211,26 @@
return ret;
}
- clock_count_per_tick = DIV_ROUND_CLOSEST(pclk, HZ);
+ ret = request_irq(irq, moxart_timer_interrupt, IRQF_TIMER,
+ node->name, &timer->clkevt);
+ if (ret) {
+ pr_err("%s: setup_irq failed\n", node->full_name);
+ return ret;
+ }
- writel(~0, base + TIMER2_BASE + REG_LOAD);
- writel(TIMEREG_CR_2_ENABLE, base + TIMER_CR);
+ /* Clear match registers */
+ writel(0, timer->base + TIMER1_BASE + REG_MATCH1);
+ writel(0, timer->base + TIMER1_BASE + REG_MATCH2);
+ writel(0, timer->base + TIMER2_BASE + REG_MATCH1);
+ writel(0, timer->base + TIMER2_BASE + REG_MATCH2);
- moxart_clockevent.cpumask = cpumask_of(0);
- moxart_clockevent.irq = irq;
+ /*
+ * Start timer 2 rolling as our main wall clock source, keep timer 1
+ * disabled
+ */
+ writel(0, timer->base + TIMER_CR);
+ writel(~0, timer->base + TIMER2_BASE + REG_LOAD);
+ writel(timer->t1_disable_val, timer->base + TIMER_CR);
/*
* documentation is not publicly available:
@@ -173,9 +238,9 @@
* max_delta 0xfffffffe should be ok because count
* register size is u32
*/
- clockevents_config_and_register(&moxart_clockevent, pclk,
- 0x4, 0xfffffffe);
+ clockevents_config_and_register(&timer->clkevt, pclk, 0x4, 0xfffffffe);
return 0;
}
CLOCKSOURCE_OF_DECLARE(moxart, "moxa,moxart-timer", moxart_timer_init);
+CLOCKSOURCE_OF_DECLARE(aspeed, "aspeed,ast2400-timer", moxart_timer_init);
diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c
index 7f0f5b2..6555821 100644
--- a/drivers/clocksource/timer-atmel-pit.c
+++ b/drivers/clocksource/timer-atmel-pit.c
@@ -149,24 +149,13 @@
{
struct pit_data *data = dev_id;
- /*
- * irqs should be disabled here, but as the irq is shared they are only
- * guaranteed to be off if the timer irq is registered first.
- */
- WARN_ON_ONCE(!irqs_disabled());
-
/* The PIT interrupt may be disabled, and is shared */
if (clockevent_state_periodic(&data->clkevt) &&
(pit_read(data->base, AT91_PIT_SR) & AT91_PIT_PITS)) {
- unsigned nr_ticks;
-
/* Get number of ticks performed before irq, and ack it */
- nr_ticks = PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR));
- do {
- data->cnt += data->cycle;
- data->clkevt.event_handler(&data->clkevt);
- nr_ticks--;
- } while (nr_ticks);
+ data->cnt += data->cycle * PIT_PICNT(pit_read(data->base,
+ AT91_PIT_PIVR));
+ data->clkevt.event_handler(&data->clkevt);
return IRQ_HANDLED;
}
@@ -177,11 +166,41 @@
/*
* Set up both clocksource and clockevent support.
*/
-static int __init at91sam926x_pit_common_init(struct pit_data *data)
+static int __init at91sam926x_pit_dt_init(struct device_node *node)
{
- unsigned long pit_rate;
- unsigned bits;
- int ret;
+ unsigned long pit_rate;
+ unsigned bits;
+ int ret;
+ struct pit_data *data;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->base = of_iomap(node, 0);
+ if (!data->base) {
+ pr_err("Could not map PIT address\n");
+ return -ENXIO;
+ }
+
+ data->mck = of_clk_get(node, 0);
+ if (IS_ERR(data->mck)) {
+ pr_err("Unable to get mck clk\n");
+ return PTR_ERR(data->mck);
+ }
+
+ ret = clk_prepare_enable(data->mck);
+ if (ret) {
+ pr_err("Unable to enable mck\n");
+ return ret;
+ }
+
+ /* Get the interrupts property */
+ data->irq = irq_of_parse_and_map(node, 0);
+ if (!data->irq) {
+ pr_err("Unable to get IRQ from DT\n");
+ return -EINVAL;
+ }
/*
* Use our actual MCK to figure out how many MCK/16 ticks per
@@ -236,46 +255,5 @@
return 0;
}
-
-static int __init at91sam926x_pit_dt_init(struct device_node *node)
-{
- struct pit_data *data;
- int ret;
-
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- data->base = of_iomap(node, 0);
- if (!data->base) {
- pr_err("Could not map PIT address\n");
- return -ENXIO;
- }
-
- data->mck = of_clk_get(node, 0);
- if (IS_ERR(data->mck))
- /* Fallback on clkdev for !CCF-based boards */
- data->mck = clk_get(NULL, "mck");
-
- if (IS_ERR(data->mck)) {
- pr_err("Unable to get mck clk\n");
- return PTR_ERR(data->mck);
- }
-
- ret = clk_prepare_enable(data->mck);
- if (ret) {
- pr_err("Unable to enable mck\n");
- return ret;
- }
-
- /* Get the interrupts property */
- data->irq = irq_of_parse_and_map(node, 0);
- if (!data->irq) {
- pr_err("Unable to get IRQ from DT\n");
- return -EINVAL;
- }
-
- return at91sam926x_pit_common_init(data);
-}
CLOCKSOURCE_OF_DECLARE(at91sam926x_pit, "atmel,at91sam9260-pit",
at91sam926x_pit_dt_init);
diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c
index bd887e2..d630bf4 100644
--- a/drivers/clocksource/timer-oxnas-rps.c
+++ b/drivers/clocksource/timer-oxnas-rps.c
@@ -295,3 +295,5 @@
CLOCKSOURCE_OF_DECLARE(ox810se_rps,
"oxsemi,ox810se-rps-timer", oxnas_rps_timer_init);
+CLOCKSOURCE_OF_DECLARE(ox820_rps,
+ "oxsemi,ox820se-rps-timer", oxnas_rps_timer_init);
diff --git a/drivers/clocksource/timer-ti-32k.c b/drivers/clocksource/timer-ti-32k.c
index 92b7e39..cf5b14e 100644
--- a/drivers/clocksource/timer-ti-32k.c
+++ b/drivers/clocksource/timer-ti-32k.c
@@ -65,7 +65,7 @@
return container_of(cs, struct ti_32k, cs);
}
-static cycle_t ti_32k_read_cycles(struct clocksource *cs)
+static cycle_t notrace ti_32k_read_cycles(struct clocksource *cs)
{
struct ti_32k *ti = to_ti_32k(cs);
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 74919aa..d8b164a 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -194,7 +194,7 @@
If in doubt, say N.
config CPU_FREQ_GOV_SCHEDUTIL
- tristate "'schedutil' cpufreq policy governor"
+ bool "'schedutil' cpufreq policy governor"
depends on CPU_FREQ && SMP
select CPU_FREQ_GOV_ATTR_SET
select IRQ_WORK
@@ -208,9 +208,6 @@
frequency tipping point is at utilization/capacity equal to 80% in
both cases.
- To compile this driver as a module, choose M here: the module will
- be called cpufreq_schedutil.
-
If in doubt, say N.
comment "CPU frequency scaling drivers"
@@ -225,7 +222,7 @@
help
This adds a generic DT based cpufreq driver for frequency management.
It supports both uniprocessor (UP) and symmetric multiprocessor (SMP)
- systems which share clock and voltage across all CPUs.
+ systems.
If in doubt, say N.
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 8882b8e..1b2f28f 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -19,10 +19,19 @@
#include <linux/delay.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
+#include <linux/dmi.h>
#include <linux/vmalloc.h>
+#include <asm/unaligned.h>
+
#include <acpi/cppc_acpi.h>
+/* Minimum struct length needed for the DMI processor entry we want */
+#define DMI_ENTRY_PROCESSOR_MIN_LENGTH 48
+
+/* Offest in the DMI processor structure for the max frequency */
+#define DMI_PROCESSOR_MAX_SPEED 0x14
+
/*
* These structs contain information parsed from per CPU
* ACPI _CPC structures.
@@ -30,19 +39,52 @@
* performance capabilities, desired performance level
* requested etc.
*/
-static struct cpudata **all_cpu_data;
+static struct cppc_cpudata **all_cpu_data;
+
+/* Capture the max KHz from DMI */
+static u64 cppc_dmi_max_khz;
+
+/* Callback function used to retrieve the max frequency from DMI */
+static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
+{
+ const u8 *dmi_data = (const u8 *)dm;
+ u16 *mhz = (u16 *)private;
+
+ if (dm->type == DMI_ENTRY_PROCESSOR &&
+ dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) {
+ u16 val = (u16)get_unaligned((const u16 *)
+ (dmi_data + DMI_PROCESSOR_MAX_SPEED));
+ *mhz = val > *mhz ? val : *mhz;
+ }
+}
+
+/* Look up the max frequency in DMI */
+static u64 cppc_get_dmi_max_khz(void)
+{
+ u16 mhz = 0;
+
+ dmi_walk(cppc_find_dmi_mhz, &mhz);
+
+ /*
+ * Real stupid fallback value, just in case there is no
+ * actual value set.
+ */
+ mhz = mhz ? mhz : 1;
+
+ return (1000 * mhz);
+}
static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
- struct cpudata *cpu;
+ struct cppc_cpudata *cpu;
struct cpufreq_freqs freqs;
int ret = 0;
cpu = all_cpu_data[policy->cpu];
- cpu->perf_ctrls.desired_perf = target_freq;
+ cpu->perf_ctrls.desired_perf = (u64)target_freq * policy->max / cppc_dmi_max_khz;
freqs.old = policy->cur;
freqs.new = target_freq;
@@ -66,7 +108,7 @@
static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy)
{
int cpu_num = policy->cpu;
- struct cpudata *cpu = all_cpu_data[cpu_num];
+ struct cppc_cpudata *cpu = all_cpu_data[cpu_num];
int ret;
cpu->perf_ctrls.desired_perf = cpu->perf_caps.lowest_perf;
@@ -79,7 +121,7 @@
static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
{
- struct cpudata *cpu;
+ struct cppc_cpudata *cpu;
unsigned int cpu_num = policy->cpu;
int ret = 0;
@@ -94,10 +136,13 @@
return ret;
}
- policy->min = cpu->perf_caps.lowest_perf;
- policy->max = cpu->perf_caps.highest_perf;
+ cppc_dmi_max_khz = cppc_get_dmi_max_khz();
+
+ policy->min = cpu->perf_caps.lowest_perf * cppc_dmi_max_khz / cpu->perf_caps.highest_perf;
+ policy->max = cppc_dmi_max_khz;
policy->cpuinfo.min_freq = policy->min;
policy->cpuinfo.max_freq = policy->max;
+ policy->cpuinfo.transition_latency = cppc_get_transition_latency(cpu_num);
policy->shared_type = cpu->shared_type;
if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
@@ -112,7 +157,8 @@
cpu->cur_policy = policy;
/* Set policy->cur to max now. The governors will adjust later. */
- policy->cur = cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf;
+ policy->cur = cppc_dmi_max_khz;
+ cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf;
ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls);
if (ret)
@@ -134,7 +180,7 @@
static int __init cppc_cpufreq_init(void)
{
int i, ret = 0;
- struct cpudata *cpu;
+ struct cppc_cpudata *cpu;
if (acpi_disabled)
return -ENODEV;
@@ -144,7 +190,7 @@
return -ENOMEM;
for_each_possible_cpu(i) {
- all_cpu_data[i] = kzalloc(sizeof(struct cpudata), GFP_KERNEL);
+ all_cpu_data[i] = kzalloc(sizeof(struct cppc_cpudata), GFP_KERNEL);
if (!all_cpu_data[i])
goto out;
@@ -175,7 +221,7 @@
static void __exit cppc_cpufreq_exit(void)
{
- struct cpudata *cpu;
+ struct cppc_cpudata *cpu;
int i;
cpufreq_unregister_driver(&cppc_cpufreq_driver);
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 2ee40fd..7126762 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -11,6 +11,8 @@
#include <linux/of.h>
#include <linux/platform_device.h>
+#include "cpufreq-dt.h"
+
static const struct of_device_id machines[] __initconst = {
{ .compatible = "allwinner,sun4i-a10", },
{ .compatible = "allwinner,sun5i-a10s", },
@@ -40,6 +42,7 @@
{ .compatible = "samsung,exynos5250", },
#ifndef CONFIG_BL_SWITCHER
{ .compatible = "samsung,exynos5420", },
+ { .compatible = "samsung,exynos5433", },
{ .compatible = "samsung,exynos5800", },
#endif
@@ -51,6 +54,7 @@
{ .compatible = "renesas,r8a7779", },
{ .compatible = "renesas,r8a7790", },
{ .compatible = "renesas,r8a7791", },
+ { .compatible = "renesas,r8a7792", },
{ .compatible = "renesas,r8a7793", },
{ .compatible = "renesas,r8a7794", },
{ .compatible = "renesas,sh73a0", },
@@ -68,6 +72,8 @@
{ .compatible = "sigma,tango4" },
+ { .compatible = "ti,am33xx", },
+ { .compatible = "ti,dra7", },
{ .compatible = "ti,omap2", },
{ .compatible = "ti,omap3", },
{ .compatible = "ti,omap4", },
@@ -91,7 +97,8 @@
if (!match)
return -ENODEV;
- return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1,
- NULL, 0));
+ return PTR_ERR_OR_ZERO(platform_device_register_data(NULL, "cpufreq-dt",
+ -1, match->data,
+ sizeof(struct cpufreq_dt_platform_data)));
}
device_initcall(cpufreq_dt_platdev_init);
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 3957de8..5c07ae0 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -25,6 +25,8 @@
#include <linux/slab.h>
#include <linux/thermal.h>
+#include "cpufreq-dt.h"
+
struct private_data {
struct device *cpu_dev;
struct thermal_cooling_device *cdev;
@@ -353,6 +355,7 @@
static int dt_cpufreq_probe(struct platform_device *pdev)
{
+ struct cpufreq_dt_platform_data *data = dev_get_platdata(&pdev->dev);
int ret;
/*
@@ -366,7 +369,8 @@
if (ret)
return ret;
- dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev);
+ if (data && data->have_governor_per_policy)
+ dt_cpufreq_driver.flags |= CPUFREQ_HAVE_GOVERNOR_PER_POLICY;
ret = cpufreq_register_driver(&dt_cpufreq_driver);
if (ret)
diff --git a/drivers/cpufreq/cpufreq-dt.h b/drivers/cpufreq/cpufreq-dt.h
new file mode 100644
index 0000000..54d774e
--- /dev/null
+++ b/drivers/cpufreq/cpufreq-dt.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2016 Linaro
+ * Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CPUFREQ_DT_H__
+#define __CPUFREQ_DT_H__
+
+#include <linux/types.h>
+
+struct cpufreq_dt_platform_data {
+ bool have_governor_per_policy;
+};
+
+#endif /* __CPUFREQ_DT_H__ */
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 3dd4884..6e6c1fb 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -916,58 +916,18 @@
.release = cpufreq_sysfs_release,
};
-static int add_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu)
+static int add_cpu_dev_symlink(struct cpufreq_policy *policy,
+ struct device *dev)
{
- struct device *cpu_dev;
-
- pr_debug("%s: Adding symlink for CPU: %u\n", __func__, cpu);
-
- if (!policy)
- return 0;
-
- cpu_dev = get_cpu_device(cpu);
- if (WARN_ON(!cpu_dev))
- return 0;
-
- return sysfs_create_link(&cpu_dev->kobj, &policy->kobj, "cpufreq");
+ dev_dbg(dev, "%s: Adding symlink\n", __func__);
+ return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
}
-static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu)
+static void remove_cpu_dev_symlink(struct cpufreq_policy *policy,
+ struct device *dev)
{
- struct device *cpu_dev;
-
- pr_debug("%s: Removing symlink for CPU: %u\n", __func__, cpu);
-
- cpu_dev = get_cpu_device(cpu);
- if (WARN_ON(!cpu_dev))
- return;
-
- sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
-}
-
-/* Add/remove symlinks for all related CPUs */
-static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
-{
- unsigned int j;
- int ret = 0;
-
- /* Some related CPUs might not be present (physically hotplugged) */
- for_each_cpu(j, policy->real_cpus) {
- ret = add_cpu_dev_symlink(policy, j);
- if (ret)
- break;
- }
-
- return ret;
-}
-
-static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy)
-{
- unsigned int j;
-
- /* Some related CPUs might not be present (physically hotplugged) */
- for_each_cpu(j, policy->real_cpus)
- remove_cpu_dev_symlink(policy, j);
+ dev_dbg(dev, "%s: Removing symlink\n", __func__);
+ sysfs_remove_link(&dev->kobj, "cpufreq");
}
static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
@@ -999,7 +959,7 @@
return ret;
}
- return cpufreq_add_dev_symlink(policy);
+ return 0;
}
__weak struct cpufreq_governor *cpufreq_default_governor(void)
@@ -1073,13 +1033,9 @@
static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
{
- struct device *dev = get_cpu_device(cpu);
struct cpufreq_policy *policy;
int ret;
- if (WARN_ON(!dev))
- return NULL;
-
policy = kzalloc(sizeof(*policy), GFP_KERNEL);
if (!policy)
return NULL;
@@ -1133,7 +1089,6 @@
down_write(&policy->rwsem);
cpufreq_stats_free_table(policy);
- cpufreq_remove_dev_symlink(policy);
kobj = &policy->kobj;
cmp = &policy->kobj_unregister;
up_write(&policy->rwsem);
@@ -1215,8 +1170,8 @@
if (new_policy) {
/* related_cpus should at least include policy->cpus. */
cpumask_copy(policy->related_cpus, policy->cpus);
- /* Remember CPUs present at the policy creation time. */
- cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask);
+ /* Clear mask of registered CPUs */
+ cpumask_clear(policy->real_cpus);
}
/*
@@ -1331,6 +1286,8 @@
return ret;
}
+static int cpufreq_offline(unsigned int cpu);
+
/**
* cpufreq_add_dev - the cpufreq interface for a CPU device.
* @dev: CPU device.
@@ -1340,25 +1297,31 @@
{
struct cpufreq_policy *policy;
unsigned cpu = dev->id;
+ int ret;
dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu);
- if (cpu_online(cpu))
- return cpufreq_online(cpu);
+ if (cpu_online(cpu)) {
+ ret = cpufreq_online(cpu);
+ if (ret)
+ return ret;
+ }
- /*
- * A hotplug notifier will follow and we will handle it as CPU online
- * then. For now, just create the sysfs link, unless there is no policy
- * or the link is already present.
- */
+ /* Create sysfs link on CPU registration */
policy = per_cpu(cpufreq_cpu_data, cpu);
if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus))
return 0;
- return add_cpu_dev_symlink(policy, cpu);
+ ret = add_cpu_dev_symlink(policy, dev);
+ if (ret) {
+ cpumask_clear_cpu(cpu, policy->real_cpus);
+ cpufreq_offline(cpu);
+ }
+
+ return ret;
}
-static void cpufreq_offline(unsigned int cpu)
+static int cpufreq_offline(unsigned int cpu)
{
struct cpufreq_policy *policy;
int ret;
@@ -1368,7 +1331,7 @@
policy = cpufreq_cpu_get_raw(cpu);
if (!policy) {
pr_debug("%s: No cpu_data found\n", __func__);
- return;
+ return 0;
}
down_write(&policy->rwsem);
@@ -1417,6 +1380,7 @@
unlock:
up_write(&policy->rwsem);
+ return 0;
}
/**
@@ -1436,7 +1400,7 @@
cpufreq_offline(cpu);
cpumask_clear_cpu(cpu, policy->real_cpus);
- remove_cpu_dev_symlink(policy, cpu);
+ remove_cpu_dev_symlink(policy, dev);
if (cpumask_empty(policy->real_cpus))
cpufreq_policy_free(policy, true);
@@ -2332,28 +2296,6 @@
}
EXPORT_SYMBOL(cpufreq_update_policy);
-static int cpufreq_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- case CPU_DOWN_FAILED:
- cpufreq_online(cpu);
- break;
-
- case CPU_DOWN_PREPARE:
- cpufreq_offline(cpu);
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block __refdata cpufreq_cpu_notifier = {
- .notifier_call = cpufreq_cpu_callback,
-};
-
/*********************************************************************
* BOOST *
*********************************************************************/
@@ -2455,6 +2397,7 @@
/*********************************************************************
* REGISTER / UNREGISTER CPUFREQ DRIVER *
*********************************************************************/
+static enum cpuhp_state hp_online;
/**
* cpufreq_register_driver - register a CPU Frequency driver
@@ -2517,7 +2460,14 @@
goto err_if_unreg;
}
- register_hotcpu_notifier(&cpufreq_cpu_notifier);
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "cpufreq:online",
+ cpufreq_online,
+ cpufreq_offline);
+ if (ret < 0)
+ goto err_if_unreg;
+ hp_online = ret;
+ ret = 0;
+
pr_debug("driver %s up and running\n", driver_data->name);
goto out;
@@ -2556,7 +2506,7 @@
get_online_cpus();
subsys_interface_unregister(&cpufreq_interface);
remove_boost_sysfs_file();
- unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
+ cpuhp_remove_state_nocalls(hp_online);
write_lock_irqsave(&cpufreq_driver_lock, flags);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index e415349..642dd0f 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -260,7 +260,7 @@
}
static void dbs_update_util_handler(struct update_util_data *data, u64 time,
- unsigned long util, unsigned long max)
+ unsigned int flags)
{
struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index be9eade..806f203 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -181,6 +181,8 @@
* @cpu: CPU number for this instance data
* @update_util: CPUFreq utility callback information
* @update_util_set: CPUFreq utility callback is set
+ * @iowait_boost: iowait-related boost fraction
+ * @last_update: Time of the last update.
* @pstate: Stores P state limits for this CPU
* @vid: Stores VID limits for this CPU
* @pid: Stores PID parameters for this CPU
@@ -206,6 +208,7 @@
struct vid_data vid;
struct _pid pid;
+ u64 last_update;
u64 last_sample_time;
u64 prev_aperf;
u64 prev_mperf;
@@ -216,6 +219,7 @@
struct acpi_processor_performance acpi_perf_data;
bool valid_pss_table;
#endif
+ unsigned int iowait_boost;
};
static struct cpudata **all_cpu_data;
@@ -229,6 +233,7 @@
* @p_gain_pct: PID proportional gain
* @i_gain_pct: PID integral gain
* @d_gain_pct: PID derivative gain
+ * @boost_iowait: Whether or not to use iowait boosting.
*
* Stores per CPU model static PID configuration data.
*/
@@ -240,6 +245,7 @@
int p_gain_pct;
int d_gain_pct;
int i_gain_pct;
+ bool boost_iowait;
};
/**
@@ -1029,7 +1035,7 @@
},
};
-static struct cpu_defaults silvermont_params = {
+static const struct cpu_defaults silvermont_params = {
.pid_policy = {
.sample_rate_ms = 10,
.deadband = 0,
@@ -1037,6 +1043,7 @@
.p_gain_pct = 14,
.d_gain_pct = 0,
.i_gain_pct = 4,
+ .boost_iowait = true,
},
.funcs = {
.get_max = atom_get_max_pstate,
@@ -1050,7 +1057,7 @@
},
};
-static struct cpu_defaults airmont_params = {
+static const struct cpu_defaults airmont_params = {
.pid_policy = {
.sample_rate_ms = 10,
.deadband = 0,
@@ -1058,6 +1065,7 @@
.p_gain_pct = 14,
.d_gain_pct = 0,
.i_gain_pct = 4,
+ .boost_iowait = true,
},
.funcs = {
.get_max = atom_get_max_pstate,
@@ -1071,7 +1079,7 @@
},
};
-static struct cpu_defaults knl_params = {
+static const struct cpu_defaults knl_params = {
.pid_policy = {
.sample_rate_ms = 10,
.deadband = 0,
@@ -1091,7 +1099,7 @@
},
};
-static struct cpu_defaults bxt_params = {
+static const struct cpu_defaults bxt_params = {
.pid_policy = {
.sample_rate_ms = 10,
.deadband = 0,
@@ -1099,6 +1107,7 @@
.p_gain_pct = 14,
.d_gain_pct = 0,
.i_gain_pct = 4,
+ .boost_iowait = true,
},
.funcs = {
.get_max = core_get_max_pstate,
@@ -1222,36 +1231,18 @@
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
{
struct sample *sample = &cpu->sample;
- u64 cummulative_iowait, delta_iowait_us;
- u64 delta_iowait_mperf;
- u64 mperf, now;
- int32_t cpu_load;
+ int32_t busy_frac, boost;
- cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now);
+ busy_frac = div_fp(sample->mperf, sample->tsc);
- /*
- * Convert iowait time into number of IO cycles spent at max_freq.
- * IO is considered as busy only for the cpu_load algorithm. For
- * performance this is not needed since we always try to reach the
- * maximum P-State, so we are already boosting the IOs.
- */
- delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait;
- delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling *
- cpu->pstate.max_pstate, MSEC_PER_SEC);
+ boost = cpu->iowait_boost;
+ cpu->iowait_boost >>= 1;
- mperf = cpu->sample.mperf + delta_iowait_mperf;
- cpu->prev_cummulative_iowait = cummulative_iowait;
+ if (busy_frac < boost)
+ busy_frac = boost;
- /*
- * The load can be estimated as the ratio of the mperf counter
- * running at a constant frequency during active periods
- * (C0) and the time stamp counter running at the same frequency
- * also during C-states.
- */
- cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
- cpu->sample.busy_scaled = cpu_load;
-
- return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load);
+ sample->busy_scaled = busy_frac * 100;
+ return get_avg_pstate(cpu) - pid_calc(&cpu->pid, sample->busy_scaled);
}
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
@@ -1325,15 +1316,29 @@
sample->mperf,
sample->aperf,
sample->tsc,
- get_avg_frequency(cpu));
+ get_avg_frequency(cpu),
+ fp_toint(cpu->iowait_boost * 100));
}
static void intel_pstate_update_util(struct update_util_data *data, u64 time,
- unsigned long util, unsigned long max)
+ unsigned int flags)
{
struct cpudata *cpu = container_of(data, struct cpudata, update_util);
- u64 delta_ns = time - cpu->sample.time;
+ u64 delta_ns;
+ if (pid_params.boost_iowait) {
+ if (flags & SCHED_CPUFREQ_IOWAIT) {
+ cpu->iowait_boost = int_tofp(1);
+ } else if (cpu->iowait_boost) {
+ /* Clear iowait_boost if the CPU may have been idle. */
+ delta_ns = time - cpu->last_update;
+ if (delta_ns > TICK_NSEC)
+ cpu->iowait_boost = 0;
+ }
+ cpu->last_update = time;
+ }
+
+ delta_ns = time - cpu->sample.time;
if ((s64)delta_ns >= pid_params.sample_rate_ns) {
bool sample_taken = intel_pstate_sample(cpu, time);
diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c
index be42f10..1b9bcd7 100644
--- a/drivers/cpufreq/kirkwood-cpufreq.c
+++ b/drivers/cpufreq/kirkwood-cpufreq.c
@@ -123,7 +123,7 @@
priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk");
if (IS_ERR(priv.cpu_clk)) {
- dev_err(priv.dev, "Unable to get cpuclk");
+ dev_err(priv.dev, "Unable to get cpuclk\n");
return PTR_ERR(priv.cpu_clk);
}
@@ -132,7 +132,7 @@
priv.ddr_clk = of_clk_get_by_name(np, "ddrclk");
if (IS_ERR(priv.ddr_clk)) {
- dev_err(priv.dev, "Unable to get ddrclk");
+ dev_err(priv.dev, "Unable to get ddrclk\n");
err = PTR_ERR(priv.ddr_clk);
goto out_cpu;
}
@@ -142,7 +142,7 @@
priv.powersave_clk = of_clk_get_by_name(np, "powersave");
if (IS_ERR(priv.powersave_clk)) {
- dev_err(priv.dev, "Unable to get powersave");
+ dev_err(priv.dev, "Unable to get powersave\n");
err = PTR_ERR(priv.powersave_clk);
goto out_ddr;
}
@@ -155,7 +155,7 @@
if (!err)
return 0;
- dev_err(priv.dev, "Failed to register cpufreq driver");
+ dev_err(priv.dev, "Failed to register cpufreq driver\n");
clk_disable_unprepare(priv.powersave_clk);
out_ddr:
diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index e8a7bf5..ea7a4e1 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -105,7 +105,6 @@
static struct platform_driver scpi_cpufreq_platdrv = {
.driver = {
.name = "scpi-cpufreq",
- .owner = THIS_MODULE,
},
.probe = scpi_cpufreq_probe,
.remove = scpi_cpufreq_remove,
diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c
index 0404203..b366e6d 100644
--- a/drivers/cpufreq/sti-cpufreq.c
+++ b/drivers/cpufreq/sti-cpufreq.c
@@ -163,7 +163,7 @@
reg_fields = sti_cpufreq_match();
if (!reg_fields) {
- dev_err(dev, "This SoC doesn't support voltage scaling");
+ dev_err(dev, "This SoC doesn't support voltage scaling\n");
return -ENODEV;
}
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index d5657d5..71e586d 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -749,65 +749,52 @@
put_cpu();
}
-/**
- * cpuidle_coupled_cpu_notify - notifier called during hotplug transitions
- * @nb: notifier block
- * @action: hotplug transition
- * @hcpu: target cpu number
- *
- * Called when a cpu is brought on or offline using hotplug. Updates the
- * coupled cpu set appropriately
- */
-static int cpuidle_coupled_cpu_notify(struct notifier_block *nb,
- unsigned long action, void *hcpu)
+static int coupled_cpu_online(unsigned int cpu)
{
- int cpu = (unsigned long)hcpu;
struct cpuidle_device *dev;
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- case CPU_DOWN_PREPARE:
- case CPU_ONLINE:
- case CPU_DEAD:
- case CPU_UP_CANCELED:
- case CPU_DOWN_FAILED:
- break;
- default:
- return NOTIFY_OK;
- }
-
mutex_lock(&cpuidle_lock);
dev = per_cpu(cpuidle_devices, cpu);
- if (!dev || !dev->coupled)
- goto out;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- case CPU_DOWN_PREPARE:
- cpuidle_coupled_prevent_idle(dev->coupled);
- break;
- case CPU_ONLINE:
- case CPU_DEAD:
+ if (dev && dev->coupled) {
cpuidle_coupled_update_online_cpus(dev->coupled);
- /* Fall through */
- case CPU_UP_CANCELED:
- case CPU_DOWN_FAILED:
cpuidle_coupled_allow_idle(dev->coupled);
- break;
}
-out:
mutex_unlock(&cpuidle_lock);
- return NOTIFY_OK;
+ return 0;
}
-static struct notifier_block cpuidle_coupled_cpu_notifier = {
- .notifier_call = cpuidle_coupled_cpu_notify,
-};
+static int coupled_cpu_up_prepare(unsigned int cpu)
+{
+ struct cpuidle_device *dev;
+
+ mutex_lock(&cpuidle_lock);
+
+ dev = per_cpu(cpuidle_devices, cpu);
+ if (dev && dev->coupled)
+ cpuidle_coupled_prevent_idle(dev->coupled);
+
+ mutex_unlock(&cpuidle_lock);
+ return 0;
+}
static int __init cpuidle_coupled_init(void)
{
- return register_cpu_notifier(&cpuidle_coupled_cpu_notifier);
+ int ret;
+
+ ret = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_COUPLED_PREPARE,
+ "cpuidle/coupled:prepare",
+ coupled_cpu_up_prepare,
+ coupled_cpu_online);
+ if (ret)
+ return ret;
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "cpuidle/coupled:online",
+ coupled_cpu_online,
+ coupled_cpu_up_prepare);
+ if (ret < 0)
+ cpuhp_remove_state_nocalls(CPUHP_CPUIDLE_COUPLED_PREPARE);
+ return ret;
}
core_initcall(cpuidle_coupled_init);
diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index 4ba3d3f..f440d38 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -121,6 +121,7 @@
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev) {
pr_err("Failed to allocate cpuidle device\n");
+ ret = -ENOMEM;
goto out_fail;
}
dev->cpu = cpu;
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index f7ca891..7fe442c 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -119,39 +119,29 @@
.enter = snooze_loop },
};
-static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
- unsigned long action, void *hcpu)
+static int powernv_cpuidle_cpu_online(unsigned int cpu)
{
- int hotcpu = (unsigned long)hcpu;
- struct cpuidle_device *dev =
- per_cpu(cpuidle_devices, hotcpu);
+ struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
if (dev && cpuidle_get_driver()) {
- switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- cpuidle_pause_and_lock();
- cpuidle_enable_device(dev);
- cpuidle_resume_and_unlock();
- break;
-
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- cpuidle_pause_and_lock();
- cpuidle_disable_device(dev);
- cpuidle_resume_and_unlock();
- break;
-
- default:
- return NOTIFY_DONE;
- }
+ cpuidle_pause_and_lock();
+ cpuidle_enable_device(dev);
+ cpuidle_resume_and_unlock();
}
- return NOTIFY_OK;
+ return 0;
}
-static struct notifier_block setup_hotplug_notifier = {
- .notifier_call = powernv_cpuidle_add_cpu_notifier,
-};
+static int powernv_cpuidle_cpu_dead(unsigned int cpu)
+{
+ struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
+
+ if (dev && cpuidle_get_driver()) {
+ cpuidle_pause_and_lock();
+ cpuidle_disable_device(dev);
+ cpuidle_resume_and_unlock();
+ }
+ return 0;
+}
/*
* powernv_cpuidle_driver_init()
@@ -355,7 +345,14 @@
return retval;
}
- register_cpu_notifier(&setup_hotplug_notifier);
+ retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "cpuidle/powernv:online",
+ powernv_cpuidle_cpu_online, NULL);
+ WARN_ON(retval < 0);
+ retval = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_DEAD,
+ "cpuidle/powernv:dead", NULL,
+ powernv_cpuidle_cpu_dead);
+ WARN_ON(retval < 0);
printk(KERN_DEBUG "powernv_idle_driver registered\n");
return 0;
}
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 07135e0..166ccd7 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -171,39 +171,29 @@
.enter = &shared_cede_loop },
};
-static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n,
- unsigned long action, void *hcpu)
+static int pseries_cpuidle_cpu_online(unsigned int cpu)
{
- int hotcpu = (unsigned long)hcpu;
- struct cpuidle_device *dev =
- per_cpu(cpuidle_devices, hotcpu);
+ struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
if (dev && cpuidle_get_driver()) {
- switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- cpuidle_pause_and_lock();
- cpuidle_enable_device(dev);
- cpuidle_resume_and_unlock();
- break;
-
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- cpuidle_pause_and_lock();
- cpuidle_disable_device(dev);
- cpuidle_resume_and_unlock();
- break;
-
- default:
- return NOTIFY_DONE;
- }
+ cpuidle_pause_and_lock();
+ cpuidle_enable_device(dev);
+ cpuidle_resume_and_unlock();
}
- return NOTIFY_OK;
+ return 0;
}
-static struct notifier_block setup_hotplug_notifier = {
- .notifier_call = pseries_cpuidle_add_cpu_notifier,
-};
+static int pseries_cpuidle_cpu_dead(unsigned int cpu)
+{
+ struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
+
+ if (dev && cpuidle_get_driver()) {
+ cpuidle_pause_and_lock();
+ cpuidle_disable_device(dev);
+ cpuidle_resume_and_unlock();
+ }
+ return 0;
+}
/*
* pseries_cpuidle_driver_init()
@@ -273,7 +263,14 @@
return retval;
}
- register_cpu_notifier(&setup_hotplug_notifier);
+ retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "cpuidle/pseries:online",
+ pseries_cpuidle_cpu_online, NULL);
+ WARN_ON(retval < 0);
+ retval = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_DEAD,
+ "cpuidle/pseries:DEAD", NULL,
+ pseries_cpuidle_cpu_dead);
+ WARN_ON(retval < 0);
printk(KERN_DEBUG "pseries_idle_driver registered\n");
return 0;
}
diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index a5be56e..41254e7 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -76,7 +76,7 @@
config ARM_EXYNOS_BUS_DEVFREQ
tristate "ARM EXYNOS Generic Memory Bus DEVFREQ Driver"
- depends on ARCH_EXYNOS
+ depends on ARCH_EXYNOS || COMPILE_TEST
select DEVFREQ_GOV_SIMPLE_ONDEMAND
select DEVFREQ_GOV_PASSIVE
select DEVFREQ_EVENT_EXYNOS_PPMU
@@ -91,14 +91,26 @@
This does not yet operate with optimal voltages.
config ARM_TEGRA_DEVFREQ
- tristate "Tegra DEVFREQ Driver"
- depends on ARCH_TEGRA_124_SOC
- select DEVFREQ_GOV_SIMPLE_ONDEMAND
- select PM_OPP
- help
- This adds the DEVFREQ driver for the Tegra family of SoCs.
- It reads ACTMON counters of memory controllers and adjusts the
- operating frequencies and voltages with OPP support.
+ tristate "Tegra DEVFREQ Driver"
+ depends on ARCH_TEGRA_124_SOC
+ select DEVFREQ_GOV_SIMPLE_ONDEMAND
+ select PM_OPP
+ help
+ This adds the DEVFREQ driver for the Tegra family of SoCs.
+ It reads ACTMON counters of memory controllers and adjusts the
+ operating frequencies and voltages with OPP support.
+
+config ARM_RK3399_DMC_DEVFREQ
+ tristate "ARM RK3399 DMC DEVFREQ Driver"
+ depends on ARCH_ROCKCHIP
+ select DEVFREQ_EVENT_ROCKCHIP_DFI
+ select DEVFREQ_GOV_SIMPLE_ONDEMAND
+ select PM_DEVFREQ_EVENT
+ select PM_OPP
+ help
+ This adds the DEVFREQ driver for the RK3399 DMC(Dynamic Memory Controller).
+ It sets the frequency for the memory controller and reads the usage counts
+ from hardware.
source "drivers/devfreq/event/Kconfig"
diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile
index 09f11d9..fbff40a 100644
--- a/drivers/devfreq/Makefile
+++ b/drivers/devfreq/Makefile
@@ -8,6 +8,7 @@
# DEVFREQ Drivers
obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o
+obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ) += rk3399_dmc.o
obj-$(CONFIG_ARM_TEGRA_DEVFREQ) += tegra-devfreq.o
# DEVFREQ Event Drivers
diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig
index eb6f74a..0fdae86 100644
--- a/drivers/devfreq/event/Kconfig
+++ b/drivers/devfreq/event/Kconfig
@@ -15,7 +15,7 @@
config DEVFREQ_EVENT_EXYNOS_NOCP
tristate "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver"
- depends on ARCH_EXYNOS
+ depends on ARCH_EXYNOS || COMPILE_TEST
select PM_OPP
help
This add the devfreq-event driver for Exynos SoC. It provides NoC
@@ -23,11 +23,18 @@
config DEVFREQ_EVENT_EXYNOS_PPMU
tristate "EXYNOS PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver"
- depends on ARCH_EXYNOS
+ depends on ARCH_EXYNOS || COMPILE_TEST
select PM_OPP
help
This add the devfreq-event driver for Exynos SoC. It provides PPMU
(Platform Performance Monitoring Unit) counters to estimate the
utilization of each module.
+config DEVFREQ_EVENT_ROCKCHIP_DFI
+ tristate "ROCKCHIP DFI DEVFREQ event Driver"
+ depends on ARCH_ROCKCHIP
+ help
+ This add the devfreq-event driver for Rockchip SoC. It provides DFI
+ (DDR Monitor Module) driver to count ddr load.
+
endif # PM_DEVFREQ_EVENT
diff --git a/drivers/devfreq/event/Makefile b/drivers/devfreq/event/Makefile
index 3d6afd3..dda7090 100644
--- a/drivers/devfreq/event/Makefile
+++ b/drivers/devfreq/event/Makefile
@@ -2,3 +2,4 @@
obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_NOCP) += exynos-nocp.o
obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_PPMU) += exynos-ppmu.o
+obj-$(CONFIG_DEVFREQ_EVENT_ROCKCHIP_DFI) += rockchip-dfi.o
diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index 845bf25..f55cf0e 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c
@@ -406,8 +406,6 @@
of_property_read_string(node, "event-name", &desc[j].name);
j++;
-
- of_node_put(node);
}
info->desc = desc;
diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c
new file mode 100644
index 0000000..43fcc5a
--- /dev/null
+++ b/drivers/devfreq/event/rockchip-dfi.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd
+ * Author: Lin Huang <hl@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/devfreq-event.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/of.h>
+
+#define RK3399_DMC_NUM_CH 2
+
+/* DDRMON_CTRL */
+#define DDRMON_CTRL 0x04
+#define CLR_DDRMON_CTRL (0x1f0000 << 0)
+#define LPDDR4_EN (0x10001 << 4)
+#define HARDWARE_EN (0x10001 << 3)
+#define LPDDR3_EN (0x10001 << 2)
+#define SOFTWARE_EN (0x10001 << 1)
+#define SOFTWARE_DIS (0x10000 << 1)
+#define TIME_CNT_EN (0x10001 << 0)
+
+#define DDRMON_CH0_COUNT_NUM 0x28
+#define DDRMON_CH0_DFI_ACCESS_NUM 0x2c
+#define DDRMON_CH1_COUNT_NUM 0x3c
+#define DDRMON_CH1_DFI_ACCESS_NUM 0x40
+
+/* pmu grf */
+#define PMUGRF_OS_REG2 0x308
+#define DDRTYPE_SHIFT 13
+#define DDRTYPE_MASK 7
+
+enum {
+ DDR3 = 3,
+ LPDDR3 = 6,
+ LPDDR4 = 7,
+ UNUSED = 0xFF
+};
+
+struct dmc_usage {
+ u32 access;
+ u32 total;
+};
+
+/*
+ * The dfi controller can monitor DDR load. It has an upper and lower threshold
+ * for the operating points. Whenever the usage leaves these bounds an event is
+ * generated to indicate the DDR frequency should be changed.
+ */
+struct rockchip_dfi {
+ struct devfreq_event_dev *edev;
+ struct devfreq_event_desc *desc;
+ struct dmc_usage ch_usage[RK3399_DMC_NUM_CH];
+ struct device *dev;
+ void __iomem *regs;
+ struct regmap *regmap_pmu;
+ struct clk *clk;
+};
+
+static void rockchip_dfi_start_hardware_counter(struct devfreq_event_dev *edev)
+{
+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev);
+ void __iomem *dfi_regs = info->regs;
+ u32 val;
+ u32 ddr_type;
+
+ /* get ddr type */
+ regmap_read(info->regmap_pmu, PMUGRF_OS_REG2, &val);
+ ddr_type = (val >> DDRTYPE_SHIFT) & DDRTYPE_MASK;
+
+ /* clear DDRMON_CTRL setting */
+ writel_relaxed(CLR_DDRMON_CTRL, dfi_regs + DDRMON_CTRL);
+
+ /* set ddr type to dfi */
+ if (ddr_type == LPDDR3)
+ writel_relaxed(LPDDR3_EN, dfi_regs + DDRMON_CTRL);
+ else if (ddr_type == LPDDR4)
+ writel_relaxed(LPDDR4_EN, dfi_regs + DDRMON_CTRL);
+
+ /* enable count, use software mode */
+ writel_relaxed(SOFTWARE_EN, dfi_regs + DDRMON_CTRL);
+}
+
+static void rockchip_dfi_stop_hardware_counter(struct devfreq_event_dev *edev)
+{
+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev);
+ void __iomem *dfi_regs = info->regs;
+
+ writel_relaxed(SOFTWARE_DIS, dfi_regs + DDRMON_CTRL);
+}
+
+static int rockchip_dfi_get_busier_ch(struct devfreq_event_dev *edev)
+{
+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev);
+ u32 tmp, max = 0;
+ u32 i, busier_ch = 0;
+ void __iomem *dfi_regs = info->regs;
+
+ rockchip_dfi_stop_hardware_counter(edev);
+
+ /* Find out which channel is busier */
+ for (i = 0; i < RK3399_DMC_NUM_CH; i++) {
+ info->ch_usage[i].access = readl_relaxed(dfi_regs +
+ DDRMON_CH0_DFI_ACCESS_NUM + i * 20) * 4;
+ info->ch_usage[i].total = readl_relaxed(dfi_regs +
+ DDRMON_CH0_COUNT_NUM + i * 20);
+ tmp = info->ch_usage[i].access;
+ if (tmp > max) {
+ busier_ch = i;
+ max = tmp;
+ }
+ }
+ rockchip_dfi_start_hardware_counter(edev);
+
+ return busier_ch;
+}
+
+static int rockchip_dfi_disable(struct devfreq_event_dev *edev)
+{
+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev);
+
+ rockchip_dfi_stop_hardware_counter(edev);
+ clk_disable_unprepare(info->clk);
+
+ return 0;
+}
+
+static int rockchip_dfi_enable(struct devfreq_event_dev *edev)
+{
+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev);
+ int ret;
+
+ ret = clk_prepare_enable(info->clk);
+ if (ret) {
+ dev_err(&edev->dev, "failed to enable dfi clk: %d\n", ret);
+ return ret;
+ }
+
+ rockchip_dfi_start_hardware_counter(edev);
+ return 0;
+}
+
+static int rockchip_dfi_set_event(struct devfreq_event_dev *edev)
+{
+ return 0;
+}
+
+static int rockchip_dfi_get_event(struct devfreq_event_dev *edev,
+ struct devfreq_event_data *edata)
+{
+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev);
+ int busier_ch;
+
+ busier_ch = rockchip_dfi_get_busier_ch(edev);
+
+ edata->load_count = info->ch_usage[busier_ch].access;
+ edata->total_count = info->ch_usage[busier_ch].total;
+
+ return 0;
+}
+
+static const struct devfreq_event_ops rockchip_dfi_ops = {
+ .disable = rockchip_dfi_disable,
+ .enable = rockchip_dfi_enable,
+ .get_event = rockchip_dfi_get_event,
+ .set_event = rockchip_dfi_set_event,
+};
+
+static const struct of_device_id rockchip_dfi_id_match[] = {
+ { .compatible = "rockchip,rk3399-dfi" },
+ { },
+};
+
+static int rockchip_dfi_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct rockchip_dfi *data;
+ struct resource *res;
+ struct devfreq_event_desc *desc;
+ struct device_node *np = pdev->dev.of_node, *node;
+
+ data = devm_kzalloc(dev, sizeof(struct rockchip_dfi), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ data->regs = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(data->regs))
+ return PTR_ERR(data->regs);
+
+ data->clk = devm_clk_get(dev, "pclk_ddr_mon");
+ if (IS_ERR(data->clk)) {
+ dev_err(dev, "Cannot get the clk dmc_clk\n");
+ return PTR_ERR(data->clk);
+ };
+
+ /* try to find the optional reference to the pmu syscon */
+ node = of_parse_phandle(np, "rockchip,pmu", 0);
+ if (node) {
+ data->regmap_pmu = syscon_node_to_regmap(node);
+ if (IS_ERR(data->regmap_pmu))
+ return PTR_ERR(data->regmap_pmu);
+ }
+ data->dev = dev;
+
+ desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
+ if (!desc)
+ return -ENOMEM;
+
+ desc->ops = &rockchip_dfi_ops;
+ desc->driver_data = data;
+ desc->name = np->name;
+ data->desc = desc;
+
+ data->edev = devm_devfreq_event_add_edev(&pdev->dev, desc);
+ if (IS_ERR(data->edev)) {
+ dev_err(&pdev->dev,
+ "failed to add devfreq-event device\n");
+ return PTR_ERR(data->edev);
+ }
+
+ platform_set_drvdata(pdev, data);
+
+ return 0;
+}
+
+static struct platform_driver rockchip_dfi_driver = {
+ .probe = rockchip_dfi_probe,
+ .driver = {
+ .name = "rockchip-dfi",
+ .of_match_table = rockchip_dfi_id_match,
+ },
+};
+module_platform_driver(rockchip_dfi_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Lin Huang <hl@rock-chips.com>");
+MODULE_DESCRIPTION("Rockchip DFI driver");
diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c
new file mode 100644
index 0000000..e24b73d
--- /dev/null
+++ b/drivers/devfreq/rk3399_dmc.c
@@ -0,0 +1,470 @@
+/*
+ * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd.
+ * Author: Lin Huang <hl@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/devfreq.h>
+#include <linux/devfreq-event.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/regulator/consumer.h>
+#include <linux/rwsem.h>
+#include <linux/suspend.h>
+
+#include <soc/rockchip/rockchip_sip.h>
+
+struct dram_timing {
+ unsigned int ddr3_speed_bin;
+ unsigned int pd_idle;
+ unsigned int sr_idle;
+ unsigned int sr_mc_gate_idle;
+ unsigned int srpd_lite_idle;
+ unsigned int standby_idle;
+ unsigned int auto_pd_dis_freq;
+ unsigned int dram_dll_dis_freq;
+ unsigned int phy_dll_dis_freq;
+ unsigned int ddr3_odt_dis_freq;
+ unsigned int ddr3_drv;
+ unsigned int ddr3_odt;
+ unsigned int phy_ddr3_ca_drv;
+ unsigned int phy_ddr3_dq_drv;
+ unsigned int phy_ddr3_odt;
+ unsigned int lpddr3_odt_dis_freq;
+ unsigned int lpddr3_drv;
+ unsigned int lpddr3_odt;
+ unsigned int phy_lpddr3_ca_drv;
+ unsigned int phy_lpddr3_dq_drv;
+ unsigned int phy_lpddr3_odt;
+ unsigned int lpddr4_odt_dis_freq;
+ unsigned int lpddr4_drv;
+ unsigned int lpddr4_dq_odt;
+ unsigned int lpddr4_ca_odt;
+ unsigned int phy_lpddr4_ca_drv;
+ unsigned int phy_lpddr4_ck_cs_drv;
+ unsigned int phy_lpddr4_dq_drv;
+ unsigned int phy_lpddr4_odt;
+};
+
+struct rk3399_dmcfreq {
+ struct device *dev;
+ struct devfreq *devfreq;
+ struct devfreq_simple_ondemand_data ondemand_data;
+ struct clk *dmc_clk;
+ struct devfreq_event_dev *edev;
+ struct mutex lock;
+ struct dram_timing timing;
+
+ /*
+ * DDR Converser of Frequency (DCF) is used to implement DDR frequency
+ * conversion without the participation of CPU, we will implement and
+ * control it in arm trust firmware.
+ */
+ wait_queue_head_t wait_dcf_queue;
+ int irq;
+ int wait_dcf_flag;
+ struct regulator *vdd_center;
+ unsigned long rate, target_rate;
+ unsigned long volt, target_volt;
+ struct dev_pm_opp *curr_opp;
+};
+
+static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
+ u32 flags)
+{
+ struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev);
+ struct dev_pm_opp *opp;
+ unsigned long old_clk_rate = dmcfreq->rate;
+ unsigned long target_volt, target_rate;
+ int err;
+
+ rcu_read_lock();
+ opp = devfreq_recommended_opp(dev, freq, flags);
+ if (IS_ERR(opp)) {
+ rcu_read_unlock();
+ return PTR_ERR(opp);
+ }
+
+ target_rate = dev_pm_opp_get_freq(opp);
+ target_volt = dev_pm_opp_get_voltage(opp);
+
+ dmcfreq->rate = dev_pm_opp_get_freq(dmcfreq->curr_opp);
+ dmcfreq->volt = dev_pm_opp_get_voltage(dmcfreq->curr_opp);
+
+ rcu_read_unlock();
+
+ if (dmcfreq->rate == target_rate)
+ return 0;
+
+ mutex_lock(&dmcfreq->lock);
+
+ /*
+ * If frequency scaling from low to high, adjust voltage first.
+ * If frequency scaling from high to low, adjust frequency first.
+ */
+ if (old_clk_rate < target_rate) {
+ err = regulator_set_voltage(dmcfreq->vdd_center, target_volt,
+ target_volt);
+ if (err) {
+ dev_err(dev, "Cannot to set voltage %lu uV\n",
+ target_volt);
+ goto out;
+ }
+ }
+ dmcfreq->wait_dcf_flag = 1;
+
+ err = clk_set_rate(dmcfreq->dmc_clk, target_rate);
+ if (err) {
+ dev_err(dev, "Cannot to set frequency %lu (%d)\n",
+ target_rate, err);
+ regulator_set_voltage(dmcfreq->vdd_center, dmcfreq->volt,
+ dmcfreq->volt);
+ goto out;
+ }
+
+ /*
+ * Wait until bcf irq happen, it means freq scaling finish in
+ * arm trust firmware, use 100ms as timeout time.
+ */
+ if (!wait_event_timeout(dmcfreq->wait_dcf_queue,
+ !dmcfreq->wait_dcf_flag, HZ / 10))
+ dev_warn(dev, "Timeout waiting for dcf interrupt\n");
+
+ /*
+ * Check the dpll rate,
+ * There only two result we will get,
+ * 1. Ddr frequency scaling fail, we still get the old rate.
+ * 2. Ddr frequency scaling sucessful, we get the rate we set.
+ */
+ dmcfreq->rate = clk_get_rate(dmcfreq->dmc_clk);
+
+ /* If get the incorrect rate, set voltage to old value. */
+ if (dmcfreq->rate != target_rate) {
+ dev_err(dev, "Get wrong ddr frequency, Request frequency %lu,\
+ Current frequency %lu\n", target_rate, dmcfreq->rate);
+ regulator_set_voltage(dmcfreq->vdd_center, dmcfreq->volt,
+ dmcfreq->volt);
+ goto out;
+ } else if (old_clk_rate > target_rate)
+ err = regulator_set_voltage(dmcfreq->vdd_center, target_volt,
+ target_volt);
+ if (err)
+ dev_err(dev, "Cannot to set vol %lu uV\n", target_volt);
+
+ dmcfreq->curr_opp = opp;
+out:
+ mutex_unlock(&dmcfreq->lock);
+ return err;
+}
+
+static int rk3399_dmcfreq_get_dev_status(struct device *dev,
+ struct devfreq_dev_status *stat)
+{
+ struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev);
+ struct devfreq_event_data edata;
+ int ret = 0;
+
+ ret = devfreq_event_get_event(dmcfreq->edev, &edata);
+ if (ret < 0)
+ return ret;
+
+ stat->current_frequency = dmcfreq->rate;
+ stat->busy_time = edata.load_count;
+ stat->total_time = edata.total_count;
+
+ return ret;
+}
+
+static int rk3399_dmcfreq_get_cur_freq(struct device *dev, unsigned long *freq)
+{
+ struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev);
+
+ *freq = dmcfreq->rate;
+
+ return 0;
+}
+
+static struct devfreq_dev_profile rk3399_devfreq_dmc_profile = {
+ .polling_ms = 200,
+ .target = rk3399_dmcfreq_target,
+ .get_dev_status = rk3399_dmcfreq_get_dev_status,
+ .get_cur_freq = rk3399_dmcfreq_get_cur_freq,
+};
+
+static __maybe_unused int rk3399_dmcfreq_suspend(struct device *dev)
+{
+ struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev);
+ int ret = 0;
+
+ ret = devfreq_event_disable_edev(dmcfreq->edev);
+ if (ret < 0) {
+ dev_err(dev, "failed to disable the devfreq-event devices\n");
+ return ret;
+ }
+
+ ret = devfreq_suspend_device(dmcfreq->devfreq);
+ if (ret < 0) {
+ dev_err(dev, "failed to suspend the devfreq devices\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static __maybe_unused int rk3399_dmcfreq_resume(struct device *dev)
+{
+ struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev);
+ int ret = 0;
+
+ ret = devfreq_event_enable_edev(dmcfreq->edev);
+ if (ret < 0) {
+ dev_err(dev, "failed to enable the devfreq-event devices\n");
+ return ret;
+ }
+
+ ret = devfreq_resume_device(dmcfreq->devfreq);
+ if (ret < 0) {
+ dev_err(dev, "failed to resume the devfreq devices\n");
+ return ret;
+ }
+ return ret;
+}
+
+static SIMPLE_DEV_PM_OPS(rk3399_dmcfreq_pm, rk3399_dmcfreq_suspend,
+ rk3399_dmcfreq_resume);
+
+static irqreturn_t rk3399_dmc_irq(int irq, void *dev_id)
+{
+ struct rk3399_dmcfreq *dmcfreq = dev_id;
+ struct arm_smccc_res res;
+
+ dmcfreq->wait_dcf_flag = 0;
+ wake_up(&dmcfreq->wait_dcf_queue);
+
+ /* Clear the DCF interrupt */
+ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0,
+ ROCKCHIP_SIP_CONFIG_DRAM_CLR_IRQ,
+ 0, 0, 0, 0, &res);
+
+ return IRQ_HANDLED;
+}
+
+static int of_get_ddr_timings(struct dram_timing *timing,
+ struct device_node *np)
+{
+ int ret = 0;
+
+ ret = of_property_read_u32(np, "rockchip,ddr3_speed_bin",
+ &timing->ddr3_speed_bin);
+ ret |= of_property_read_u32(np, "rockchip,pd_idle",
+ &timing->pd_idle);
+ ret |= of_property_read_u32(np, "rockchip,sr_idle",
+ &timing->sr_idle);
+ ret |= of_property_read_u32(np, "rockchip,sr_mc_gate_idle",
+ &timing->sr_mc_gate_idle);
+ ret |= of_property_read_u32(np, "rockchip,srpd_lite_idle",
+ &timing->srpd_lite_idle);
+ ret |= of_property_read_u32(np, "rockchip,standby_idle",
+ &timing->standby_idle);
+ ret |= of_property_read_u32(np, "rockchip,auto_pd_dis_freq",
+ &timing->auto_pd_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,dram_dll_dis_freq",
+ &timing->dram_dll_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,phy_dll_dis_freq",
+ &timing->phy_dll_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,ddr3_odt_dis_freq",
+ &timing->ddr3_odt_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,ddr3_drv",
+ &timing->ddr3_drv);
+ ret |= of_property_read_u32(np, "rockchip,ddr3_odt",
+ &timing->ddr3_odt);
+ ret |= of_property_read_u32(np, "rockchip,phy_ddr3_ca_drv",
+ &timing->phy_ddr3_ca_drv);
+ ret |= of_property_read_u32(np, "rockchip,phy_ddr3_dq_drv",
+ &timing->phy_ddr3_dq_drv);
+ ret |= of_property_read_u32(np, "rockchip,phy_ddr3_odt",
+ &timing->phy_ddr3_odt);
+ ret |= of_property_read_u32(np, "rockchip,lpddr3_odt_dis_freq",
+ &timing->lpddr3_odt_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,lpddr3_drv",
+ &timing->lpddr3_drv);
+ ret |= of_property_read_u32(np, "rockchip,lpddr3_odt",
+ &timing->lpddr3_odt);
+ ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_ca_drv",
+ &timing->phy_lpddr3_ca_drv);
+ ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_dq_drv",
+ &timing->phy_lpddr3_dq_drv);
+ ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_odt",
+ &timing->phy_lpddr3_odt);
+ ret |= of_property_read_u32(np, "rockchip,lpddr4_odt_dis_freq",
+ &timing->lpddr4_odt_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,lpddr4_drv",
+ &timing->lpddr4_drv);
+ ret |= of_property_read_u32(np, "rockchip,lpddr4_dq_odt",
+ &timing->lpddr4_dq_odt);
+ ret |= of_property_read_u32(np, "rockchip,lpddr4_ca_odt",
+ &timing->lpddr4_ca_odt);
+ ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ca_drv",
+ &timing->phy_lpddr4_ca_drv);
+ ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ck_cs_drv",
+ &timing->phy_lpddr4_ck_cs_drv);
+ ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_dq_drv",
+ &timing->phy_lpddr4_dq_drv);
+ ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_odt",
+ &timing->phy_lpddr4_odt);
+
+ return ret;
+}
+
+static int rk3399_dmcfreq_probe(struct platform_device *pdev)
+{
+ struct arm_smccc_res res;
+ struct device *dev = &pdev->dev;
+ struct device_node *np = pdev->dev.of_node;
+ struct rk3399_dmcfreq *data;
+ int ret, irq, index, size;
+ uint32_t *timing;
+ struct dev_pm_opp *opp;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "Cannot get the dmc interrupt resource\n");
+ return -EINVAL;
+ }
+ data = devm_kzalloc(dev, sizeof(struct rk3399_dmcfreq), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ mutex_init(&data->lock);
+
+ data->vdd_center = devm_regulator_get(dev, "center");
+ if (IS_ERR(data->vdd_center)) {
+ dev_err(dev, "Cannot get the regulator \"center\"\n");
+ return PTR_ERR(data->vdd_center);
+ }
+
+ data->dmc_clk = devm_clk_get(dev, "dmc_clk");
+ if (IS_ERR(data->dmc_clk)) {
+ dev_err(dev, "Cannot get the clk dmc_clk\n");
+ return PTR_ERR(data->dmc_clk);
+ };
+
+ data->irq = irq;
+ ret = devm_request_irq(dev, irq, rk3399_dmc_irq, 0,
+ dev_name(dev), data);
+ if (ret) {
+ dev_err(dev, "Failed to request dmc irq: %d\n", ret);
+ return ret;
+ }
+
+ init_waitqueue_head(&data->wait_dcf_queue);
+ data->wait_dcf_flag = 0;
+
+ data->edev = devfreq_event_get_edev_by_phandle(dev, 0);
+ if (IS_ERR(data->edev))
+ return -EPROBE_DEFER;
+
+ ret = devfreq_event_enable_edev(data->edev);
+ if (ret < 0) {
+ dev_err(dev, "failed to enable devfreq-event devices\n");
+ return ret;
+ }
+
+ /*
+ * Get dram timing and pass it to arm trust firmware,
+ * the dram drvier in arm trust firmware will get these
+ * timing and to do dram initial.
+ */
+ if (!of_get_ddr_timings(&data->timing, np)) {
+ timing = &data->timing.ddr3_speed_bin;
+ size = sizeof(struct dram_timing) / 4;
+ for (index = 0; index < size; index++) {
+ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, *timing++, index,
+ ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM,
+ 0, 0, 0, 0, &res);
+ if (res.a0) {
+ dev_err(dev, "Failed to set dram param: %ld\n",
+ res.a0);
+ return -EINVAL;
+ }
+ }
+ }
+
+ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0,
+ ROCKCHIP_SIP_CONFIG_DRAM_INIT,
+ 0, 0, 0, 0, &res);
+
+ /*
+ * We add a devfreq driver to our parent since it has a device tree node
+ * with operating points.
+ */
+ if (dev_pm_opp_of_add_table(dev)) {
+ dev_err(dev, "Invalid operating-points in device tree.\n");
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
+ of_property_read_u32(np, "upthreshold",
+ &data->ondemand_data.upthreshold);
+ of_property_read_u32(np, "downdifferential",
+ &data->ondemand_data.downdifferential);
+
+ data->rate = clk_get_rate(data->dmc_clk);
+
+ rcu_read_lock();
+ opp = devfreq_recommended_opp(dev, &data->rate, 0);
+ if (IS_ERR(opp)) {
+ rcu_read_unlock();
+ return PTR_ERR(opp);
+ }
+ rcu_read_unlock();
+ data->curr_opp = opp;
+
+ rk3399_devfreq_dmc_profile.initial_freq = data->rate;
+
+ data->devfreq = devfreq_add_device(dev,
+ &rk3399_devfreq_dmc_profile,
+ "simple_ondemand",
+ &data->ondemand_data);
+ if (IS_ERR(data->devfreq))
+ return PTR_ERR(data->devfreq);
+ devm_devfreq_register_opp_notifier(dev, data->devfreq);
+
+ data->dev = dev;
+ platform_set_drvdata(pdev, data);
+
+ return 0;
+}
+
+static const struct of_device_id rk3399dmc_devfreq_of_match[] = {
+ { .compatible = "rockchip,rk3399-dmc" },
+ { },
+};
+
+static struct platform_driver rk3399_dmcfreq_driver = {
+ .probe = rk3399_dmcfreq_probe,
+ .driver = {
+ .name = "rk3399-dmc-freq",
+ .pm = &rk3399_dmcfreq_pm,
+ .of_match_table = rk3399dmc_devfreq_of_match,
+ },
+};
+module_platform_driver(rk3399_dmcfreq_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Lin Huang <hl@rock-chips.com>");
+MODULE_DESCRIPTION("RK3399 dmcfreq driver with devfreq framework");
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index edf053f..da18b18 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -46,9 +46,9 @@
u8 _dmsize = _is_slave ? _sconfig->dst_maxburst : \
DW_DMA_MSIZE_16; \
u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ? \
- _dwc->p_master : _dwc->m_master; \
+ _dwc->dws.p_master : _dwc->dws.m_master; \
u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ? \
- _dwc->p_master : _dwc->m_master; \
+ _dwc->dws.p_master : _dwc->dws.m_master; \
\
(DWC_CTLL_DST_MSIZE(_dmsize) \
| DWC_CTLL_SRC_MSIZE(_smsize) \
@@ -143,12 +143,16 @@
struct dw_dma *dw = to_dw_dma(dwc->chan.device);
u32 cfghi = DWC_CFGH_FIFO_MODE;
u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
+ bool hs_polarity = dwc->dws.hs_polarity;
if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags))
return;
- cfghi |= DWC_CFGH_DST_PER(dwc->dst_id);
- cfghi |= DWC_CFGH_SRC_PER(dwc->src_id);
+ cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id);
+ cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id);
+
+ /* Set polarity of handshake interface */
+ cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0;
channel_writel(dwc, CFG_LO, cfglo);
channel_writel(dwc, CFG_HI, cfghi);
@@ -209,7 +213,7 @@
static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
{
struct dw_dma *dw = to_dw_dma(dwc->chan.device);
- u8 lms = DWC_LLP_LMS(dwc->m_master);
+ u8 lms = DWC_LLP_LMS(dwc->dws.m_master);
unsigned long was_soft_llp;
/* ASSERT: channel is idle */
@@ -662,7 +666,7 @@
struct dw_desc *prev;
size_t xfer_count;
size_t offset;
- u8 m_master = dwc->m_master;
+ u8 m_master = dwc->dws.m_master;
unsigned int src_width;
unsigned int dst_width;
unsigned int data_width = dw->pdata->data_width[m_master];
@@ -740,7 +744,7 @@
struct dw_desc *prev;
struct dw_desc *first;
u32 ctllo;
- u8 m_master = dwc->m_master;
+ u8 m_master = dwc->dws.m_master;
u8 lms = DWC_LLP_LMS(m_master);
dma_addr_t reg;
unsigned int reg_width;
@@ -895,12 +899,7 @@
return false;
/* We have to copy data since dws can be temporary storage */
-
- dwc->src_id = dws->src_id;
- dwc->dst_id = dws->dst_id;
-
- dwc->m_master = dws->m_master;
- dwc->p_master = dws->p_master;
+ memcpy(&dwc->dws, dws, sizeof(struct dw_dma_slave));
return true;
}
@@ -1167,11 +1166,7 @@
spin_lock_irqsave(&dwc->lock, flags);
/* Clear custom channel configuration */
- dwc->src_id = 0;
- dwc->dst_id = 0;
-
- dwc->m_master = 0;
- dwc->p_master = 0;
+ memset(&dwc->dws, 0, sizeof(struct dw_dma_slave));
clear_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
@@ -1264,7 +1259,7 @@
struct dw_cyclic_desc *retval = NULL;
struct dw_desc *desc;
struct dw_desc *last = NULL;
- u8 lms = DWC_LLP_LMS(dwc->m_master);
+ u8 lms = DWC_LLP_LMS(dwc->dws.m_master);
unsigned long was_cyclic;
unsigned int reg_width;
unsigned int periods;
@@ -1576,11 +1571,7 @@
(dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0;
} else {
dwc->block_size = pdata->block_size;
-
- /* Check if channel supports multi block transfer */
- channel_writel(dwc, LLP, DWC_LLP_LOC(0xffffffff));
- dwc->nollp = DWC_LLP_LOC(channel_readl(dwc, LLP)) == 0;
- channel_writel(dwc, LLP, 0);
+ dwc->nollp = pdata->is_nollp;
}
}
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index 4b7bd78..f65dd10 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -245,10 +245,7 @@
bool nollp;
/* custom slave configuration */
- u8 src_id;
- u8 dst_id;
- u8 m_master;
- u8 p_master;
+ struct dw_dma_slave dws;
/* configuration passed via .device_config */
struct dma_slave_config dma_sconfig;
diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
index c5f21ef..29d04ca 100644
--- a/drivers/dma/hsu/hsu.c
+++ b/drivers/dma/hsu/hsu.c
@@ -200,10 +200,9 @@
* is not a normal timeout interrupt, ie. hsu_dma_get_status() returned 0.
*
* Return:
- * IRQ_NONE for invalid channel number, IRQ_HANDLED otherwise.
+ * 0 for invalid channel number, 1 otherwise.
*/
-irqreturn_t hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr,
- u32 status)
+int hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr, u32 status)
{
struct hsu_dma_chan *hsuc;
struct hsu_dma_desc *desc;
@@ -211,7 +210,7 @@
/* Sanity check */
if (nr >= chip->hsu->nr_channels)
- return IRQ_NONE;
+ return 0;
hsuc = &chip->hsu->chan[nr];
@@ -230,7 +229,7 @@
}
spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
- return IRQ_HANDLED;
+ return 1;
}
EXPORT_SYMBOL_GPL(hsu_dma_do_irq);
diff --git a/drivers/dma/hsu/pci.c b/drivers/dma/hsu/pci.c
index 9916058..b51639f 100644
--- a/drivers/dma/hsu/pci.c
+++ b/drivers/dma/hsu/pci.c
@@ -29,7 +29,7 @@
u32 dmaisr;
u32 status;
unsigned short i;
- irqreturn_t ret = IRQ_NONE;
+ int ret = 0;
int err;
dmaisr = readl(chip->regs + HSU_PCI_DMAISR);
@@ -37,14 +37,14 @@
if (dmaisr & 0x1) {
err = hsu_dma_get_status(chip, i, &status);
if (err > 0)
- ret |= IRQ_HANDLED;
+ ret |= 1;
else if (err == 0)
ret |= hsu_dma_do_irq(chip, i, status);
}
dmaisr >>= 1;
}
- return ret;
+ return IRQ_RETVAL(ret);
}
static int hsu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 03ec76f..3cb4738 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -648,15 +648,11 @@
writel_relaxed(val, sdma->regs + chnenbl);
}
-static void sdma_handle_channel_loop(struct sdma_channel *sdmac)
-{
- if (sdmac->desc.callback)
- sdmac->desc.callback(sdmac->desc.callback_param);
-}
-
static void sdma_update_channel_loop(struct sdma_channel *sdmac)
{
struct sdma_buffer_descriptor *bd;
+ int error = 0;
+ enum dma_status old_status = sdmac->status;
/*
* loop mode. Iterate over descriptors, re-setup them and
@@ -668,17 +664,42 @@
if (bd->mode.status & BD_DONE)
break;
- if (bd->mode.status & BD_RROR)
+ if (bd->mode.status & BD_RROR) {
+ bd->mode.status &= ~BD_RROR;
sdmac->status = DMA_ERROR;
+ error = -EIO;
+ }
+ /*
+ * We use bd->mode.count to calculate the residue, since contains
+ * the number of bytes present in the current buffer descriptor.
+ */
+
+ sdmac->chn_real_count = bd->mode.count;
bd->mode.status |= BD_DONE;
+ bd->mode.count = sdmac->period_len;
+
+ /*
+ * The callback is called from the interrupt context in order
+ * to reduce latency and to avoid the risk of altering the
+ * SDMA transaction status by the time the client tasklet is
+ * executed.
+ */
+
+ if (sdmac->desc.callback)
+ sdmac->desc.callback(sdmac->desc.callback_param);
+
sdmac->buf_tail++;
sdmac->buf_tail %= sdmac->num_bd;
+
+ if (error)
+ sdmac->status = old_status;
}
}
-static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac)
+static void mxc_sdma_handle_channel_normal(unsigned long data)
{
+ struct sdma_channel *sdmac = (struct sdma_channel *) data;
struct sdma_buffer_descriptor *bd;
int i, error = 0;
@@ -705,16 +726,6 @@
sdmac->desc.callback(sdmac->desc.callback_param);
}
-static void sdma_tasklet(unsigned long data)
-{
- struct sdma_channel *sdmac = (struct sdma_channel *) data;
-
- if (sdmac->flags & IMX_DMA_SG_LOOP)
- sdma_handle_channel_loop(sdmac);
- else
- mxc_sdma_handle_channel_normal(sdmac);
-}
-
static irqreturn_t sdma_int_handler(int irq, void *dev_id)
{
struct sdma_engine *sdma = dev_id;
@@ -731,8 +742,8 @@
if (sdmac->flags & IMX_DMA_SG_LOOP)
sdma_update_channel_loop(sdmac);
-
- tasklet_schedule(&sdmac->tasklet);
+ else
+ tasklet_schedule(&sdmac->tasklet);
__clear_bit(channel, &stat);
}
@@ -1353,7 +1364,8 @@
u32 residue;
if (sdmac->flags & IMX_DMA_SG_LOOP)
- residue = (sdmac->num_bd - sdmac->buf_tail) * sdmac->period_len;
+ residue = (sdmac->num_bd - sdmac->buf_tail) *
+ sdmac->period_len - sdmac->chn_real_count;
else
residue = sdmac->chn_count - sdmac->chn_real_count;
@@ -1732,7 +1744,7 @@
dma_cookie_init(&sdmac->chan);
sdmac->channel = i;
- tasklet_init(&sdmac->tasklet, sdma_tasklet,
+ tasklet_init(&sdmac->tasklet, mxc_sdma_handle_channel_normal,
(unsigned long) sdmac);
/*
* Add the channel to the DMAC list. Do not add channel 0 though
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 9b6800a..daaac2c 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -148,12 +148,12 @@
};
/* Scalable MCA error strings */
-static const char * const f17h_ls_mce_desc[] = {
+static const char * const smca_ls_mce_desc[] = {
"Load queue parity",
"Store queue parity",
"Miss address buffer payload parity",
"L1 TLB parity",
- "", /* reserved */
+ "Reserved",
"DC tag error type 6",
"DC tag error type 1",
"Internal error type 1",
@@ -172,7 +172,7 @@
"L2 fill data error",
};
-static const char * const f17h_if_mce_desc[] = {
+static const char * const smca_if_mce_desc[] = {
"microtag probe port parity error",
"IC microtag or full tag multi-hit error",
"IC full tag parity",
@@ -185,19 +185,22 @@
"BPQ snoop parity on Thread 1",
"L1 BTB multi-match error",
"L2 BTB multi-match error",
+ "L2 Cache Response Poison error",
+ "System Read Data error",
};
-static const char * const f17h_l2_mce_desc[] = {
+static const char * const smca_l2_mce_desc[] = {
"L2M tag multi-way-hit error",
"L2M tag ECC error",
"L2M data ECC error",
"HW assert",
};
-static const char * const f17h_de_mce_desc[] = {
+static const char * const smca_de_mce_desc[] = {
"uop cache tag parity error",
"uop cache data parity error",
"Insn buffer parity error",
+ "uop queue parity error",
"Insn dispatch queue parity error",
"Fetch address FIFO parity",
"Patch RAM data parity",
@@ -205,7 +208,7 @@
"uop buffer parity"
};
-static const char * const f17h_ex_mce_desc[] = {
+static const char * const smca_ex_mce_desc[] = {
"Watchdog timeout error",
"Phy register file parity",
"Flag register file parity",
@@ -214,18 +217,22 @@
"EX payload parity",
"Checkpoint queue parity",
"Retire dispatch queue parity",
+ "Retire status queue parity error",
+ "Scheduling queue parity error",
+ "Branch buffer queue parity error",
};
-static const char * const f17h_fp_mce_desc[] = {
+static const char * const smca_fp_mce_desc[] = {
"Physical register file parity",
"Freelist parity error",
"Schedule queue parity",
"NSQ parity error",
"Retire queue parity",
"Status register file parity",
+ "Hardware assertion",
};
-static const char * const f17h_l3_mce_desc[] = {
+static const char * const smca_l3_mce_desc[] = {
"Shadow tag macro ECC error",
"Shadow tag macro multi-way-hit error",
"L3M tag ECC error",
@@ -236,7 +243,7 @@
"L3 HW assert",
};
-static const char * const f17h_cs_mce_desc[] = {
+static const char * const smca_cs_mce_desc[] = {
"Illegal request from transport layer",
"Address violation",
"Security violation",
@@ -248,14 +255,14 @@
"ECC error on probe filter access",
};
-static const char * const f17h_pie_mce_desc[] = {
+static const char * const smca_pie_mce_desc[] = {
"HW assert",
"Internal PIE register security violation",
"Error on GMI link",
"Poison data written to internal PIE register",
};
-static const char * const f17h_umc_mce_desc[] = {
+static const char * const smca_umc_mce_desc[] = {
"DRAM ECC error",
"Data poison error on DRAM",
"SDP parity error",
@@ -264,18 +271,39 @@
"Write data CRC error",
};
-static const char * const f17h_pb_mce_desc[] = {
+static const char * const smca_pb_mce_desc[] = {
"Parameter Block RAM ECC error",
};
-static const char * const f17h_psp_mce_desc[] = {
+static const char * const smca_psp_mce_desc[] = {
"PSP RAM ECC or parity error",
};
-static const char * const f17h_smu_mce_desc[] = {
+static const char * const smca_smu_mce_desc[] = {
"SMU RAM ECC or parity error",
};
+struct smca_mce_desc {
+ const char * const *descs;
+ unsigned int num_descs;
+};
+
+static struct smca_mce_desc smca_mce_descs[] = {
+ [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
+ [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
+ [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
+ [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
+ [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) },
+ [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
+ [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
+ [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
+ [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
+ [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
+ [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
+ [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
+ [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
+};
+
static bool f12h_mc0_mce(u16 ec, u8 xec)
{
bool ret = false;
@@ -820,175 +848,35 @@
pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
}
-static void decode_f17h_core_errors(const char *ip_name, u8 xec,
- unsigned int mca_type)
-{
- const char * const *error_desc_array;
- size_t len;
-
- pr_emerg(HW_ERR "%s Error: ", ip_name);
-
- switch (mca_type) {
- case SMCA_LS:
- error_desc_array = f17h_ls_mce_desc;
- len = ARRAY_SIZE(f17h_ls_mce_desc) - 1;
-
- if (xec == 0x4) {
- pr_cont("Unrecognized LS MCA error code.\n");
- return;
- }
- break;
-
- case SMCA_IF:
- error_desc_array = f17h_if_mce_desc;
- len = ARRAY_SIZE(f17h_if_mce_desc) - 1;
- break;
-
- case SMCA_L2_CACHE:
- error_desc_array = f17h_l2_mce_desc;
- len = ARRAY_SIZE(f17h_l2_mce_desc) - 1;
- break;
-
- case SMCA_DE:
- error_desc_array = f17h_de_mce_desc;
- len = ARRAY_SIZE(f17h_de_mce_desc) - 1;
- break;
-
- case SMCA_EX:
- error_desc_array = f17h_ex_mce_desc;
- len = ARRAY_SIZE(f17h_ex_mce_desc) - 1;
- break;
-
- case SMCA_FP:
- error_desc_array = f17h_fp_mce_desc;
- len = ARRAY_SIZE(f17h_fp_mce_desc) - 1;
- break;
-
- case SMCA_L3_CACHE:
- error_desc_array = f17h_l3_mce_desc;
- len = ARRAY_SIZE(f17h_l3_mce_desc) - 1;
- break;
-
- default:
- pr_cont("Corrupted MCA core error info.\n");
- return;
- }
-
- if (xec > len) {
- pr_cont("Unrecognized %s MCA bank error code.\n",
- amd_core_mcablock_names[mca_type]);
- return;
- }
-
- pr_cont("%s.\n", error_desc_array[xec]);
-}
-
-static void decode_df_errors(u8 xec, unsigned int mca_type)
-{
- const char * const *error_desc_array;
- size_t len;
-
- pr_emerg(HW_ERR "Data Fabric Error: ");
-
- switch (mca_type) {
- case SMCA_CS:
- error_desc_array = f17h_cs_mce_desc;
- len = ARRAY_SIZE(f17h_cs_mce_desc) - 1;
- break;
-
- case SMCA_PIE:
- error_desc_array = f17h_pie_mce_desc;
- len = ARRAY_SIZE(f17h_pie_mce_desc) - 1;
- break;
-
- default:
- pr_cont("Corrupted MCA Data Fabric info.\n");
- return;
- }
-
- if (xec > len) {
- pr_cont("Unrecognized %s MCA bank error code.\n",
- amd_df_mcablock_names[mca_type]);
- return;
- }
-
- pr_cont("%s.\n", error_desc_array[xec]);
-}
-
/* Decode errors according to Scalable MCA specification */
static void decode_smca_errors(struct mce *m)
{
- u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
- unsigned int hwid, mca_type, i;
- u8 xec = XEC(m->status, xec_mask);
- const char * const *error_desc_array;
+ struct smca_hwid_mcatype *type;
+ unsigned int bank_type;
const char *ip_name;
- u32 low, high;
- size_t len;
+ u8 xec = XEC(m->status, xec_mask);
- if (rdmsr_safe(addr, &low, &high)) {
- pr_emerg("Invalid IP block specified, error information is unreliable.\n");
+ if (m->bank >= ARRAY_SIZE(smca_banks))
return;
- }
- hwid = high & MCI_IPID_HWID;
- mca_type = (high & MCI_IPID_MCATYPE) >> 16;
+ if (boot_cpu_data.x86 >= 0x17 && m->bank == 4)
+ pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n");
- pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low);
-
- /*
- * Based on hwid and mca_type values, decode errors from respective IPs.
- * Note: mca_type values make sense only in the context of an hwid.
- */
- for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
- if (amd_hwids[i].hwid == hwid)
- break;
-
- switch (i) {
- case SMCA_F17H_CORE:
- ip_name = (mca_type == SMCA_L3_CACHE) ?
- "L3 Cache" : "F17h Core";
- return decode_f17h_core_errors(ip_name, xec, mca_type);
- break;
-
- case SMCA_DF:
- return decode_df_errors(xec, mca_type);
- break;
-
- case SMCA_UMC:
- error_desc_array = f17h_umc_mce_desc;
- len = ARRAY_SIZE(f17h_umc_mce_desc) - 1;
- break;
-
- case SMCA_PB:
- error_desc_array = f17h_pb_mce_desc;
- len = ARRAY_SIZE(f17h_pb_mce_desc) - 1;
- break;
-
- case SMCA_PSP:
- error_desc_array = f17h_psp_mce_desc;
- len = ARRAY_SIZE(f17h_psp_mce_desc) - 1;
- break;
-
- case SMCA_SMU:
- error_desc_array = f17h_smu_mce_desc;
- len = ARRAY_SIZE(f17h_smu_mce_desc) - 1;
- break;
-
- default:
- pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
+ type = smca_banks[m->bank].type;
+ if (!type)
return;
+
+ bank_type = type->bank_type;
+ ip_name = smca_bank_names[bank_type].long_name;
+
+ pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
+
+ /* Only print the decode of valid error codes */
+ if (xec < smca_mce_descs[bank_type].num_descs &&
+ (type->xec_bitmap & BIT_ULL(xec))) {
+ pr_emerg(HW_ERR "%s Error: ", ip_name);
+ pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
}
-
- ip_name = amd_hwids[i].name;
- pr_emerg(HW_ERR "%s Error: ", ip_name);
-
- if (xec > len) {
- pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
- return;
- }
-
- pr_cont("%s.\n", error_desc_array[xec]);
}
static inline void amd_decode_err_code(u16 ec)
@@ -1078,6 +966,8 @@
u32 low, high;
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
+ pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
+
if (!rdmsr_safe(addr, &low, &high) &&
(low & MCI_CONFIG_MCAX))
pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
@@ -1091,12 +981,20 @@
pr_cont("]: 0x%016llx\n", m->status);
if (m->status & MCI_STATUS_ADDRV)
- pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
+ pr_emerg(HW_ERR "Error Addr: 0x%016llx", m->addr);
if (boot_cpu_has(X86_FEATURE_SMCA)) {
+ if (m->status & MCI_STATUS_SYNDV)
+ pr_cont(", Syndrome: 0x%016llx", m->synd);
+
+ pr_cont(", IPID: 0x%016llx", m->ipid);
+
+ pr_cont("\n");
+
decode_smca_errors(m);
goto err_code;
- }
+ } else
+ pr_cont("\n");
if (!fam_ops)
goto err_code;
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index 829eec8..2fe1a13 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -23,6 +23,7 @@
#include <linux/platform_device.h>
#include <linux/dma-mapping.h>
#include <linux/errno.h>
+#include <linux/cpu.h>
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -238,33 +239,14 @@
return count;
}
-/**
- * dcdbas_smi_request: generate SMI request
- *
- * Called with smi_data_lock.
- */
-int dcdbas_smi_request(struct smi_cmd *smi_cmd)
+static int raise_smi(void *par)
{
- cpumask_var_t old_mask;
- int ret = 0;
+ struct smi_cmd *smi_cmd = par;
- if (smi_cmd->magic != SMI_CMD_MAGIC) {
- dev_info(&dcdbas_pdev->dev, "%s: invalid magic value\n",
- __func__);
- return -EBADR;
- }
-
- /* SMI requires CPU 0 */
- if (!alloc_cpumask_var(&old_mask, GFP_KERNEL))
- return -ENOMEM;
-
- cpumask_copy(old_mask, ¤t->cpus_allowed);
- set_cpus_allowed_ptr(current, cpumask_of(0));
if (smp_processor_id() != 0) {
dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n",
__func__);
- ret = -EBUSY;
- goto out;
+ return -EBUSY;
}
/* generate SMI */
@@ -280,9 +262,28 @@
: "memory"
);
-out:
- set_cpus_allowed_ptr(current, old_mask);
- free_cpumask_var(old_mask);
+ return 0;
+}
+/**
+ * dcdbas_smi_request: generate SMI request
+ *
+ * Called with smi_data_lock.
+ */
+int dcdbas_smi_request(struct smi_cmd *smi_cmd)
+{
+ int ret;
+
+ if (smi_cmd->magic != SMI_CMD_MAGIC) {
+ dev_info(&dcdbas_pdev->dev, "%s: invalid magic value\n",
+ __func__);
+ return -EBADR;
+ }
+
+ /* SMI requires CPU 0 */
+ get_online_cpus();
+ ret = smp_call_on_cpu(0, raise_smi, smi_cmd, true);
+ put_online_cpus();
+
return ret;
}
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 6394152..c981be1 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -112,6 +112,23 @@
Most users should say N.
+config EFI_TEST
+ tristate "EFI Runtime Service Tests Support"
+ depends on EFI
+ default n
+ help
+ This driver uses the efi.<service> function pointers directly instead
+ of going through the efivar API, because it is not trying to test the
+ kernel subsystem, just for testing the UEFI runtime service
+ interfaces which are provided by the firmware. This driver is used
+ by the Firmware Test Suite (FWTS) for testing the UEFI runtime
+ interfaces readiness of the firmware.
+ Details for FWTS are available from:
+ <https://wiki.ubuntu.com/FirmwareTestSuite>
+
+ Say Y here to enable the runtime services support via /dev/efi_test.
+ If unsure, say N.
+
endmenu
config UEFI_CPER
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index a219640..c8a439f 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -10,7 +10,7 @@
KASAN_SANITIZE_runtime-wrappers.o := n
obj-$(CONFIG_EFI) += efi.o vars.o reboot.o memattr.o
-obj-$(CONFIG_EFI) += capsule.o
+obj-$(CONFIG_EFI) += capsule.o memmap.o
obj-$(CONFIG_EFI_VARS) += efivars.o
obj-$(CONFIG_EFI_ESRT) += esrt.o
obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o
@@ -20,6 +20,7 @@
obj-$(CONFIG_EFI_STUB) += libstub/
obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_mem.o
obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o
+obj-$(CONFIG_EFI_TEST) += test/
arm-obj-$(CONFIG_EFI) := arm-init.o arm-runtime.o
obj-$(CONFIG_ARM) += $(arm-obj-y)
diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index c49d50e..8efe130 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -26,9 +26,9 @@
u64 efi_system_table;
-static int __init is_normal_ram(efi_memory_desc_t *md)
+static int __init is_memory(efi_memory_desc_t *md)
{
- if (md->attribute & EFI_MEMORY_WB)
+ if (md->attribute & (EFI_MEMORY_WB|EFI_MEMORY_WT|EFI_MEMORY_WC))
return 1;
return 0;
}
@@ -152,9 +152,9 @@
}
/*
- * Return true for RAM regions we want to permanently reserve.
+ * Return true for regions that can be used as System RAM.
*/
-static __init int is_reserve_region(efi_memory_desc_t *md)
+static __init int is_usable_memory(efi_memory_desc_t *md)
{
switch (md->type) {
case EFI_LOADER_CODE:
@@ -163,18 +163,22 @@
case EFI_BOOT_SERVICES_DATA:
case EFI_CONVENTIONAL_MEMORY:
case EFI_PERSISTENT_MEMORY:
- return 0;
+ /*
+ * According to the spec, these regions are no longer reserved
+ * after calling ExitBootServices(). However, we can only use
+ * them as System RAM if they can be mapped writeback cacheable.
+ */
+ return (md->attribute & EFI_MEMORY_WB);
default:
break;
}
- return is_normal_ram(md);
+ return false;
}
static __init void reserve_regions(void)
{
efi_memory_desc_t *md;
u64 paddr, npages, size;
- int resv;
if (efi_enabled(EFI_DBG))
pr_info("Processing EFI memory map:\n");
@@ -191,32 +195,29 @@
paddr = md->phys_addr;
npages = md->num_pages;
- resv = is_reserve_region(md);
if (efi_enabled(EFI_DBG)) {
char buf[64];
- pr_info(" 0x%012llx-0x%012llx %s%s\n",
+ pr_info(" 0x%012llx-0x%012llx %s\n",
paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
- efi_md_typeattr_format(buf, sizeof(buf), md),
- resv ? "*" : "");
+ efi_md_typeattr_format(buf, sizeof(buf), md));
}
memrange_efi_to_native(&paddr, &npages);
size = npages << PAGE_SHIFT;
- if (is_normal_ram(md))
+ if (is_memory(md)) {
early_init_dt_add_memory_arch(paddr, size);
- if (resv)
- memblock_mark_nomap(paddr, size);
-
+ if (!is_usable_memory(md))
+ memblock_mark_nomap(paddr, size);
+ }
}
-
- set_bit(EFI_MEMMAP, &efi.flags);
}
void __init efi_init(void)
{
+ struct efi_memory_map_data data;
struct efi_fdt_params params;
/* Grab UEFI information placed in FDT by stub */
@@ -225,9 +226,12 @@
efi_system_table = params.system_table;
- efi.memmap.phys_map = params.mmap;
- efi.memmap.map = early_memremap_ro(params.mmap, params.mmap_size);
- if (efi.memmap.map == NULL) {
+ data.desc_version = params.desc_ver;
+ data.desc_size = params.desc_size;
+ data.size = params.mmap_size;
+ data.phys_map = params.mmap;
+
+ if (efi_memmap_init_early(&data) < 0) {
/*
* If we are booting via UEFI, the UEFI memory map is the only
* description of memory we have, so there is little point in
@@ -235,9 +239,6 @@
*/
panic("Unable to map EFI memory map.\n");
}
- efi.memmap.map_end = efi.memmap.map + params.mmap_size;
- efi.memmap.desc_size = params.desc_size;
- efi.memmap.desc_version = params.desc_ver;
WARN(efi.memmap.desc_version != 1,
"Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
@@ -248,7 +249,8 @@
reserve_regions();
efi_memattr_init();
- early_memunmap(efi.memmap.map, params.mmap_size);
+ efi_esrt_init();
+ efi_memmap_unmap();
memblock_reserve(params.mmap & PAGE_MASK,
PAGE_ALIGN(params.mmap_size +
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index c394b81..7c75a8d 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -39,6 +39,26 @@
.mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
};
+#ifdef CONFIG_ARM64_PTDUMP
+#include <asm/ptdump.h>
+
+static struct ptdump_info efi_ptdump_info = {
+ .mm = &efi_mm,
+ .markers = (struct addr_marker[]){
+ { 0, "UEFI runtime start" },
+ { TASK_SIZE_64, "UEFI runtime end" }
+ },
+ .base_addr = 0,
+};
+
+static int __init ptdump_init(void)
+{
+ return ptdump_register(&efi_ptdump_info, "efi_page_tables");
+}
+device_initcall(ptdump_init);
+
+#endif
+
static bool __init efi_virtmap_init(void)
{
efi_memory_desc_t *md;
@@ -114,14 +134,12 @@
pr_info("Remapping and enabling EFI services.\n");
- mapsize = efi.memmap.map_end - efi.memmap.map;
+ mapsize = efi.memmap.desc_size * efi.memmap.nr_map;
- efi.memmap.map = memremap(efi.memmap.phys_map, mapsize, MEMREMAP_WB);
- if (!efi.memmap.map) {
+ if (efi_memmap_init_late(efi.memmap.phys_map, mapsize)) {
pr_err("Failed to remap EFI memory map\n");
return -ENOMEM;
}
- efi.memmap.map_end = efi.memmap.map + mapsize;
if (!efi_virtmap_init()) {
pr_err("UEFI virtual mapping missing or invalid -- runtime services will not be available\n");
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 30a24d0..1c33d74 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -125,16 +125,19 @@
* @entry: deleting entry
* @turn_off_scanning: Check if a scanning flag should be turned off
*/
-static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
+static inline int __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
bool turn_off_scanning)
{
if (entry->deleting) {
list_del(&entry->list);
efivar_entry_iter_end();
efivar_unregister(entry);
- efivar_entry_iter_begin();
+ if (efivar_entry_iter_begin())
+ return -EINTR;
} else if (turn_off_scanning)
entry->scanning = false;
+
+ return 0;
}
/**
@@ -144,13 +147,18 @@
* @head: list head
* @stop: a flag checking if scanning will stop
*/
-static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
+static int efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
struct efivar_entry *next,
struct list_head *head, bool stop)
{
- __efi_pstore_scan_sysfs_exit(pos, true);
+ int ret = __efi_pstore_scan_sysfs_exit(pos, true);
+
+ if (ret)
+ return ret;
+
if (stop)
- __efi_pstore_scan_sysfs_exit(next, &next->list != head);
+ ret = __efi_pstore_scan_sysfs_exit(next, &next->list != head);
+ return ret;
}
/**
@@ -172,13 +180,17 @@
struct efivar_entry *entry, *n;
struct list_head *head = &efivar_sysfs_list;
int size = 0;
+ int ret;
if (!*pos) {
list_for_each_entry_safe(entry, n, head, list) {
efi_pstore_scan_sysfs_enter(entry, n, head);
size = efi_pstore_read_func(entry, data);
- efi_pstore_scan_sysfs_exit(entry, n, head, size < 0);
+ ret = efi_pstore_scan_sysfs_exit(entry, n, head,
+ size < 0);
+ if (ret)
+ return ret;
if (size)
break;
}
@@ -190,7 +202,9 @@
efi_pstore_scan_sysfs_enter((*pos), n, head);
size = efi_pstore_read_func((*pos), data);
- efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
+ ret = efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
+ if (ret)
+ return ret;
if (size)
break;
}
@@ -232,7 +246,10 @@
if (!*data.buf)
return -ENOMEM;
- efivar_entry_iter_begin();
+ if (efivar_entry_iter_begin()) {
+ kfree(*data.buf);
+ return -EINTR;
+ }
size = efi_pstore_sysfs_entry_iter(&data,
(struct efivar_entry **)&psi->data);
efivar_entry_iter_end();
@@ -347,7 +364,8 @@
edata.time = time;
edata.name = efi_name;
- efivar_entry_iter_begin();
+ if (efivar_entry_iter_begin())
+ return -EINTR;
found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
if (found && !entry->scanning) {
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 7dd2e2d..1ac199c 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -27,6 +27,7 @@
#include <linux/slab.h>
#include <linux/acpi.h>
#include <linux/ucs2_string.h>
+#include <linux/memblock.h>
#include <asm/early_ioremap.h>
@@ -347,56 +348,31 @@
/*
* Find the efi memory descriptor for a given physical address. Given a
- * physicall address, determine if it exists within an EFI Memory Map entry,
+ * physical address, determine if it exists within an EFI Memory Map entry,
* and if so, populate the supplied memory descriptor with the appropriate
* data.
*/
int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md)
{
- struct efi_memory_map *map = &efi.memmap;
- phys_addr_t p, e;
+ efi_memory_desc_t *md;
if (!efi_enabled(EFI_MEMMAP)) {
pr_err_once("EFI_MEMMAP is not enabled.\n");
return -EINVAL;
}
- if (!map) {
- pr_err_once("efi.memmap is not set.\n");
- return -EINVAL;
- }
if (!out_md) {
pr_err_once("out_md is null.\n");
return -EINVAL;
}
- if (WARN_ON_ONCE(!map->phys_map))
- return -EINVAL;
- if (WARN_ON_ONCE(map->nr_map == 0) || WARN_ON_ONCE(map->desc_size == 0))
- return -EINVAL;
- e = map->phys_map + map->nr_map * map->desc_size;
- for (p = map->phys_map; p < e; p += map->desc_size) {
- efi_memory_desc_t *md;
+ for_each_efi_memory_desc(md) {
u64 size;
u64 end;
- /*
- * If a driver calls this after efi_free_boot_services,
- * ->map will be NULL, and the target may also not be mapped.
- * So just always get our own virtual map on the CPU.
- *
- */
- md = early_memremap(p, sizeof (*md));
- if (!md) {
- pr_err_once("early_memremap(%pa, %zu) failed.\n",
- &p, sizeof (*md));
- return -ENOMEM;
- }
-
if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
md->type != EFI_BOOT_SERVICES_DATA &&
md->type != EFI_RUNTIME_SERVICES_DATA) {
- early_memunmap(md, sizeof (*md));
continue;
}
@@ -404,11 +380,8 @@
end = md->phys_addr + size;
if (phys_addr >= md->phys_addr && phys_addr < end) {
memcpy(out_md, md, sizeof(*out_md));
- early_memunmap(md, sizeof (*md));
return 0;
}
-
- early_memunmap(md, sizeof (*md));
}
pr_err_once("requested map not found.\n");
return -ENOENT;
@@ -424,6 +397,35 @@
return end;
}
+void __init __weak efi_arch_mem_reserve(phys_addr_t addr, u64 size) {}
+
+/**
+ * efi_mem_reserve - Reserve an EFI memory region
+ * @addr: Physical address to reserve
+ * @size: Size of reservation
+ *
+ * Mark a region as reserved from general kernel allocation and
+ * prevent it being released by efi_free_boot_services().
+ *
+ * This function should be called drivers once they've parsed EFI
+ * configuration tables to figure out where their data lives, e.g.
+ * efi_esrt_init().
+ */
+void __init efi_mem_reserve(phys_addr_t addr, u64 size)
+{
+ if (!memblock_is_region_reserved(addr, size))
+ memblock_reserve(addr, size);
+
+ /*
+ * Some architectures (x86) reserve all boot services ranges
+ * until efi_free_boot_services() because of buggy firmware
+ * implementations. This means the above memblock_reserve() is
+ * superfluous on x86 and instead what it needs to do is
+ * ensure the @start, @size is not freed.
+ */
+ efi_arch_mem_reserve(addr, size);
+}
+
static __initdata efi_config_table_type_t common_tables[] = {
{ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20},
{ACPI_TABLE_GUID, "ACPI", &efi.acpi},
@@ -811,6 +813,9 @@
case EFI_NOT_FOUND:
err = -ENOENT;
break;
+ case EFI_ABORTED:
+ err = -EINTR;
+ break;
default:
err = -EINVAL;
}
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 116b244..3e626fd 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -510,7 +510,8 @@
vendor = del_var->VendorGuid;
}
- efivar_entry_iter_begin();
+ if (efivar_entry_iter_begin())
+ return -EINTR;
entry = efivar_entry_find(name, vendor, &efivar_sysfs_list, true);
if (!entry)
err = -EINVAL;
@@ -575,7 +576,10 @@
return ret;
kobject_uevent(&new_var->kobj, KOBJ_ADD);
- efivar_entry_add(new_var, &efivar_sysfs_list);
+ if (efivar_entry_add(new_var, &efivar_sysfs_list)) {
+ efivar_unregister(new_var);
+ return -EINTR;
+ }
return 0;
}
@@ -690,7 +694,10 @@
static int efivar_sysfs_destroy(struct efivar_entry *entry, void *data)
{
- efivar_entry_remove(entry);
+ int err = efivar_entry_remove(entry);
+
+ if (err)
+ return err;
efivar_unregister(entry);
return 0;
}
@@ -698,7 +705,14 @@
static void efivars_sysfs_exit(void)
{
/* Remove all entries and destroy */
- __efivar_entry_iter(efivar_sysfs_destroy, &efivar_sysfs_list, NULL, NULL);
+ int err;
+
+ err = __efivar_entry_iter(efivar_sysfs_destroy, &efivar_sysfs_list,
+ NULL, NULL);
+ if (err) {
+ pr_err("efivars: Failed to destroy sysfs entries\n");
+ return;
+ }
if (efivars_new_var)
sysfs_remove_bin_file(&efivars_kset->kobj, efivars_new_var);
diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c
index 75feb3f..1491407 100644
--- a/drivers/firmware/efi/esrt.c
+++ b/drivers/firmware/efi/esrt.c
@@ -16,6 +16,7 @@
#include <linux/device.h>
#include <linux/efi.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/kobject.h>
#include <linux/list.h>
@@ -235,7 +236,7 @@
};
/*
- * remap the table, copy it to kmalloced pages, and unmap it.
+ * remap the table, validate it, mark it reserved and unmap it.
*/
void __init efi_esrt_init(void)
{
@@ -335,7 +336,7 @@
end = esrt_data + size;
pr_info("Reserving ESRT space from %pa to %pa.\n", &esrt_data, &end);
- memblock_reserve(esrt_data, esrt_data_size);
+ efi_mem_reserve(esrt_data, esrt_data_size);
pr_debug("esrt-init: loaded.\n");
err_memunmap:
@@ -382,28 +383,18 @@
static int __init esrt_sysfs_init(void)
{
int error;
- struct efi_system_resource_table __iomem *ioesrt;
pr_debug("esrt-sysfs: loading.\n");
if (!esrt_data || !esrt_data_size)
return -ENOSYS;
- ioesrt = ioremap(esrt_data, esrt_data_size);
- if (!ioesrt) {
- pr_err("ioremap(%pa, %zu) failed.\n", &esrt_data,
+ esrt = memremap(esrt_data, esrt_data_size, MEMREMAP_WB);
+ if (!esrt) {
+ pr_err("memremap(%pa, %zu) failed.\n", &esrt_data,
esrt_data_size);
return -ENOMEM;
}
- esrt = kmalloc(esrt_data_size, GFP_KERNEL);
- if (!esrt) {
- pr_err("kmalloc failed. (wanted %zu bytes)\n", esrt_data_size);
- iounmap(ioesrt);
- return -ENOMEM;
- }
-
- memcpy_fromio(esrt, ioesrt, esrt_data_size);
-
esrt_kobj = kobject_create_and_add("esrt", efi_kobj);
if (!esrt_kobj) {
pr_err("Firmware table registration failed.\n");
@@ -429,8 +420,6 @@
if (error)
goto err_cleanup_list;
- memblock_remove(esrt_data, esrt_data_size);
-
pr_debug("esrt-sysfs: loaded.\n");
return 0;
diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c
index 48430ab..520a40e 100644
--- a/drivers/firmware/efi/fake_mem.c
+++ b/drivers/firmware/efi/fake_mem.c
@@ -35,17 +35,13 @@
#define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM
-struct fake_mem {
- struct range range;
- u64 attribute;
-};
-static struct fake_mem fake_mems[EFI_MAX_FAKEMEM];
+static struct efi_mem_range fake_mems[EFI_MAX_FAKEMEM];
static int nr_fake_mem;
static int __init cmp_fake_mem(const void *x1, const void *x2)
{
- const struct fake_mem *m1 = x1;
- const struct fake_mem *m2 = x2;
+ const struct efi_mem_range *m1 = x1;
+ const struct efi_mem_range *m2 = x2;
if (m1->range.start < m2->range.start)
return -1;
@@ -56,40 +52,21 @@
void __init efi_fake_memmap(void)
{
- u64 start, end, m_start, m_end, m_attr;
int new_nr_map = efi.memmap.nr_map;
efi_memory_desc_t *md;
phys_addr_t new_memmap_phy;
void *new_memmap;
- void *old, *new;
int i;
- if (!nr_fake_mem || !efi_enabled(EFI_MEMMAP))
+ if (!nr_fake_mem)
return;
/* count up the number of EFI memory descriptor */
- for_each_efi_memory_desc(md) {
- start = md->phys_addr;
- end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+ for (i = 0; i < nr_fake_mem; i++) {
+ for_each_efi_memory_desc(md) {
+ struct range *r = &fake_mems[i].range;
- for (i = 0; i < nr_fake_mem; i++) {
- /* modifying range */
- m_start = fake_mems[i].range.start;
- m_end = fake_mems[i].range.end;
-
- if (m_start <= start) {
- /* split into 2 parts */
- if (start < m_end && m_end < end)
- new_nr_map++;
- }
- if (start < m_start && m_start < end) {
- /* split into 3 parts */
- if (m_end < end)
- new_nr_map += 2;
- /* split into 2 parts */
- if (end <= m_end)
- new_nr_map++;
- }
+ new_nr_map += efi_memmap_split_count(md, r);
}
}
@@ -107,85 +84,13 @@
return;
}
- for (old = efi.memmap.map, new = new_memmap;
- old < efi.memmap.map_end;
- old += efi.memmap.desc_size, new += efi.memmap.desc_size) {
-
- /* copy original EFI memory descriptor */
- memcpy(new, old, efi.memmap.desc_size);
- md = new;
- start = md->phys_addr;
- end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
-
- for (i = 0; i < nr_fake_mem; i++) {
- /* modifying range */
- m_start = fake_mems[i].range.start;
- m_end = fake_mems[i].range.end;
- m_attr = fake_mems[i].attribute;
-
- if (m_start <= start && end <= m_end)
- md->attribute |= m_attr;
-
- if (m_start <= start &&
- (start < m_end && m_end < end)) {
- /* first part */
- md->attribute |= m_attr;
- md->num_pages = (m_end - md->phys_addr + 1) >>
- EFI_PAGE_SHIFT;
- /* latter part */
- new += efi.memmap.desc_size;
- memcpy(new, old, efi.memmap.desc_size);
- md = new;
- md->phys_addr = m_end + 1;
- md->num_pages = (end - md->phys_addr + 1) >>
- EFI_PAGE_SHIFT;
- }
-
- if ((start < m_start && m_start < end) && m_end < end) {
- /* first part */
- md->num_pages = (m_start - md->phys_addr) >>
- EFI_PAGE_SHIFT;
- /* middle part */
- new += efi.memmap.desc_size;
- memcpy(new, old, efi.memmap.desc_size);
- md = new;
- md->attribute |= m_attr;
- md->phys_addr = m_start;
- md->num_pages = (m_end - m_start + 1) >>
- EFI_PAGE_SHIFT;
- /* last part */
- new += efi.memmap.desc_size;
- memcpy(new, old, efi.memmap.desc_size);
- md = new;
- md->phys_addr = m_end + 1;
- md->num_pages = (end - m_end) >>
- EFI_PAGE_SHIFT;
- }
-
- if ((start < m_start && m_start < end) &&
- (end <= m_end)) {
- /* first part */
- md->num_pages = (m_start - md->phys_addr) >>
- EFI_PAGE_SHIFT;
- /* latter part */
- new += efi.memmap.desc_size;
- memcpy(new, old, efi.memmap.desc_size);
- md = new;
- md->phys_addr = m_start;
- md->num_pages = (end - md->phys_addr + 1) >>
- EFI_PAGE_SHIFT;
- md->attribute |= m_attr;
- }
- }
- }
+ for (i = 0; i < nr_fake_mem; i++)
+ efi_memmap_insert(&efi.memmap, new_memmap, &fake_mems[i]);
/* swap into new EFI memmap */
- efi_unmap_memmap();
- efi.memmap.map = new_memmap;
- efi.memmap.phys_map = new_memmap_phy;
- efi.memmap.nr_map = new_nr_map;
- efi.memmap.map_end = efi.memmap.map + efi.memmap.nr_map * efi.memmap.desc_size;
- set_bit(EFI_MEMMAP, &efi.flags);
+ early_memunmap(new_memmap, efi.memmap.desc_size * new_nr_map);
+
+ efi_memmap_install(new_memmap_phy, new_nr_map);
/* print new EFI memmap */
efi_print_memmap();
@@ -223,7 +128,7 @@
p++;
}
- sort(fake_mems, nr_fake_mem, sizeof(struct fake_mem),
+ sort(fake_mems, nr_fake_mem, sizeof(struct efi_mem_range),
cmp_fake_mem, NULL);
for (i = 0; i < nr_fake_mem; i++)
diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c
new file mode 100644
index 0000000..f03ddec
--- /dev/null
+++ b/drivers/firmware/efi/memmap.c
@@ -0,0 +1,303 @@
+/*
+ * Common EFI memory map functions.
+ */
+
+#define pr_fmt(fmt) "efi: " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <asm/early_ioremap.h>
+
+/**
+ * __efi_memmap_init - Common code for mapping the EFI memory map
+ * @data: EFI memory map data
+ * @late: Use early or late mapping function?
+ *
+ * This function takes care of figuring out which function to use to
+ * map the EFI memory map in efi.memmap based on how far into the boot
+ * we are.
+ *
+ * During bootup @late should be %false since we only have access to
+ * the early_memremap*() functions as the vmalloc space isn't setup.
+ * Once the kernel is fully booted we can fallback to the more robust
+ * memremap*() API.
+ *
+ * Returns zero on success, a negative error code on failure.
+ */
+static int __init
+__efi_memmap_init(struct efi_memory_map_data *data, bool late)
+{
+ struct efi_memory_map map;
+ phys_addr_t phys_map;
+
+ if (efi_enabled(EFI_PARAVIRT))
+ return 0;
+
+ phys_map = data->phys_map;
+
+ if (late)
+ map.map = memremap(phys_map, data->size, MEMREMAP_WB);
+ else
+ map.map = early_memremap(phys_map, data->size);
+
+ if (!map.map) {
+ pr_err("Could not map the memory map!\n");
+ return -ENOMEM;
+ }
+
+ map.phys_map = data->phys_map;
+ map.nr_map = data->size / data->desc_size;
+ map.map_end = map.map + data->size;
+
+ map.desc_version = data->desc_version;
+ map.desc_size = data->desc_size;
+ map.late = late;
+
+ set_bit(EFI_MEMMAP, &efi.flags);
+
+ efi.memmap = map;
+
+ return 0;
+}
+
+/**
+ * efi_memmap_init_early - Map the EFI memory map data structure
+ * @data: EFI memory map data
+ *
+ * Use early_memremap() to map the passed in EFI memory map and assign
+ * it to efi.memmap.
+ */
+int __init efi_memmap_init_early(struct efi_memory_map_data *data)
+{
+ /* Cannot go backwards */
+ WARN_ON(efi.memmap.late);
+
+ return __efi_memmap_init(data, false);
+}
+
+void __init efi_memmap_unmap(void)
+{
+ if (!efi.memmap.late) {
+ unsigned long size;
+
+ size = efi.memmap.desc_size * efi.memmap.nr_map;
+ early_memunmap(efi.memmap.map, size);
+ } else {
+ memunmap(efi.memmap.map);
+ }
+
+ efi.memmap.map = NULL;
+ clear_bit(EFI_MEMMAP, &efi.flags);
+}
+
+/**
+ * efi_memmap_init_late - Map efi.memmap with memremap()
+ * @phys_addr: Physical address of the new EFI memory map
+ * @size: Size in bytes of the new EFI memory map
+ *
+ * Setup a mapping of the EFI memory map using ioremap_cache(). This
+ * function should only be called once the vmalloc space has been
+ * setup and is therefore not suitable for calling during early EFI
+ * initialise, e.g. in efi_init(). Additionally, it expects
+ * efi_memmap_init_early() to have already been called.
+ *
+ * The reason there are two EFI memmap initialisation
+ * (efi_memmap_init_early() and this late version) is because the
+ * early EFI memmap should be explicitly unmapped once EFI
+ * initialisation is complete as the fixmap space used to map the EFI
+ * memmap (via early_memremap()) is a scarce resource.
+ *
+ * This late mapping is intended to persist for the duration of
+ * runtime so that things like efi_mem_desc_lookup() and
+ * efi_mem_attributes() always work.
+ *
+ * Returns zero on success, a negative error code on failure.
+ */
+int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size)
+{
+ struct efi_memory_map_data data = {
+ .phys_map = addr,
+ .size = size,
+ };
+
+ /* Did we forget to unmap the early EFI memmap? */
+ WARN_ON(efi.memmap.map);
+
+ /* Were we already called? */
+ WARN_ON(efi.memmap.late);
+
+ /*
+ * It makes no sense to allow callers to register different
+ * values for the following fields. Copy them out of the
+ * existing early EFI memmap.
+ */
+ data.desc_version = efi.memmap.desc_version;
+ data.desc_size = efi.memmap.desc_size;
+
+ return __efi_memmap_init(&data, true);
+}
+
+/**
+ * efi_memmap_install - Install a new EFI memory map in efi.memmap
+ * @addr: Physical address of the memory map
+ * @nr_map: Number of entries in the memory map
+ *
+ * Unlike efi_memmap_init_*(), this function does not allow the caller
+ * to switch from early to late mappings. It simply uses the existing
+ * mapping function and installs the new memmap.
+ *
+ * Returns zero on success, a negative error code on failure.
+ */
+int __init efi_memmap_install(phys_addr_t addr, unsigned int nr_map)
+{
+ struct efi_memory_map_data data;
+
+ efi_memmap_unmap();
+
+ data.phys_map = addr;
+ data.size = efi.memmap.desc_size * nr_map;
+ data.desc_version = efi.memmap.desc_version;
+ data.desc_size = efi.memmap.desc_size;
+
+ return __efi_memmap_init(&data, efi.memmap.late);
+}
+
+/**
+ * efi_memmap_split_count - Count number of additional EFI memmap entries
+ * @md: EFI memory descriptor to split
+ * @range: Address range (start, end) to split around
+ *
+ * Returns the number of additional EFI memmap entries required to
+ * accomodate @range.
+ */
+int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range)
+{
+ u64 m_start, m_end;
+ u64 start, end;
+ int count = 0;
+
+ start = md->phys_addr;
+ end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+
+ /* modifying range */
+ m_start = range->start;
+ m_end = range->end;
+
+ if (m_start <= start) {
+ /* split into 2 parts */
+ if (start < m_end && m_end < end)
+ count++;
+ }
+
+ if (start < m_start && m_start < end) {
+ /* split into 3 parts */
+ if (m_end < end)
+ count += 2;
+ /* split into 2 parts */
+ if (end <= m_end)
+ count++;
+ }
+
+ return count;
+}
+
+/**
+ * efi_memmap_insert - Insert a memory region in an EFI memmap
+ * @old_memmap: The existing EFI memory map structure
+ * @buf: Address of buffer to store new map
+ * @mem: Memory map entry to insert
+ *
+ * It is suggested that you call efi_memmap_split_count() first
+ * to see how large @buf needs to be.
+ */
+void __init efi_memmap_insert(struct efi_memory_map *old_memmap, void *buf,
+ struct efi_mem_range *mem)
+{
+ u64 m_start, m_end, m_attr;
+ efi_memory_desc_t *md;
+ u64 start, end;
+ void *old, *new;
+
+ /* modifying range */
+ m_start = mem->range.start;
+ m_end = mem->range.end;
+ m_attr = mem->attribute;
+
+ /*
+ * The EFI memory map deals with regions in EFI_PAGE_SIZE
+ * units. Ensure that the region described by 'mem' is aligned
+ * correctly.
+ */
+ if (!IS_ALIGNED(m_start, EFI_PAGE_SIZE) ||
+ !IS_ALIGNED(m_end + 1, EFI_PAGE_SIZE)) {
+ WARN_ON(1);
+ return;
+ }
+
+ for (old = old_memmap->map, new = buf;
+ old < old_memmap->map_end;
+ old += old_memmap->desc_size, new += old_memmap->desc_size) {
+
+ /* copy original EFI memory descriptor */
+ memcpy(new, old, old_memmap->desc_size);
+ md = new;
+ start = md->phys_addr;
+ end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+
+ if (m_start <= start && end <= m_end)
+ md->attribute |= m_attr;
+
+ if (m_start <= start &&
+ (start < m_end && m_end < end)) {
+ /* first part */
+ md->attribute |= m_attr;
+ md->num_pages = (m_end - md->phys_addr + 1) >>
+ EFI_PAGE_SHIFT;
+ /* latter part */
+ new += old_memmap->desc_size;
+ memcpy(new, old, old_memmap->desc_size);
+ md = new;
+ md->phys_addr = m_end + 1;
+ md->num_pages = (end - md->phys_addr + 1) >>
+ EFI_PAGE_SHIFT;
+ }
+
+ if ((start < m_start && m_start < end) && m_end < end) {
+ /* first part */
+ md->num_pages = (m_start - md->phys_addr) >>
+ EFI_PAGE_SHIFT;
+ /* middle part */
+ new += old_memmap->desc_size;
+ memcpy(new, old, old_memmap->desc_size);
+ md = new;
+ md->attribute |= m_attr;
+ md->phys_addr = m_start;
+ md->num_pages = (m_end - m_start + 1) >>
+ EFI_PAGE_SHIFT;
+ /* last part */
+ new += old_memmap->desc_size;
+ memcpy(new, old, old_memmap->desc_size);
+ md = new;
+ md->phys_addr = m_end + 1;
+ md->num_pages = (end - m_end) >>
+ EFI_PAGE_SHIFT;
+ }
+
+ if ((start < m_start && m_start < end) &&
+ (end <= m_end)) {
+ /* first part */
+ md->num_pages = (m_start - md->phys_addr) >>
+ EFI_PAGE_SHIFT;
+ /* latter part */
+ new += old_memmap->desc_size;
+ memcpy(new, old, old_memmap->desc_size);
+ md = new;
+ md->phys_addr = m_start;
+ md->num_pages = (end - md->phys_addr + 1) >>
+ EFI_PAGE_SHIFT;
+ md->attribute |= m_attr;
+ }
+ }
+}
diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c
index 5c55227..8e64b77 100644
--- a/drivers/firmware/efi/runtime-map.c
+++ b/drivers/firmware/efi/runtime-map.c
@@ -14,10 +14,6 @@
#include <asm/setup.h>
-static void *efi_runtime_map;
-static int nr_efi_runtime_map;
-static u32 efi_memdesc_size;
-
struct efi_runtime_map_entry {
efi_memory_desc_t md;
struct kobject kobj; /* kobject for each entry */
@@ -106,7 +102,8 @@
static struct kset *map_kset;
static struct efi_runtime_map_entry *
-add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
+add_sysfs_runtime_map_entry(struct kobject *kobj, int nr,
+ efi_memory_desc_t *md)
{
int ret;
struct efi_runtime_map_entry *entry;
@@ -124,8 +121,7 @@
return ERR_PTR(-ENOMEM);
}
- memcpy(&entry->md, efi_runtime_map + nr * efi_memdesc_size,
- sizeof(efi_memory_desc_t));
+ memcpy(&entry->md, md, sizeof(efi_memory_desc_t));
kobject_init(&entry->kobj, &map_ktype);
entry->kobj.kset = map_kset;
@@ -142,12 +138,12 @@
int efi_get_runtime_map_size(void)
{
- return nr_efi_runtime_map * efi_memdesc_size;
+ return efi.memmap.nr_map * efi.memmap.desc_size;
}
int efi_get_runtime_map_desc_size(void)
{
- return efi_memdesc_size;
+ return efi.memmap.desc_size;
}
int efi_runtime_map_copy(void *buf, size_t bufsz)
@@ -157,38 +153,33 @@
if (sz > bufsz)
sz = bufsz;
- memcpy(buf, efi_runtime_map, sz);
+ memcpy(buf, efi.memmap.map, sz);
return 0;
}
-void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size)
-{
- efi_runtime_map = map;
- nr_efi_runtime_map = nr_entries;
- efi_memdesc_size = desc_size;
-}
-
int __init efi_runtime_map_init(struct kobject *efi_kobj)
{
int i, j, ret = 0;
struct efi_runtime_map_entry *entry;
+ efi_memory_desc_t *md;
- if (!efi_runtime_map)
+ if (!efi_enabled(EFI_MEMMAP))
return 0;
- map_entries = kzalloc(nr_efi_runtime_map * sizeof(entry), GFP_KERNEL);
+ map_entries = kzalloc(efi.memmap.nr_map * sizeof(entry), GFP_KERNEL);
if (!map_entries) {
ret = -ENOMEM;
goto out;
}
- for (i = 0; i < nr_efi_runtime_map; i++) {
- entry = add_sysfs_runtime_map_entry(efi_kobj, i);
+ i = 0;
+ for_each_efi_memory_desc(md) {
+ entry = add_sysfs_runtime_map_entry(efi_kobj, i, md);
if (IS_ERR(entry)) {
ret = PTR_ERR(entry);
goto out_add_entry;
}
- *(map_entries + i) = entry;
+ *(map_entries + i++) = entry;
}
return 0;
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 4195877..ae54870b 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -14,11 +14,13 @@
* This file is released under the GPLv2.
*/
+#define pr_fmt(fmt) "efi: " fmt
+
#include <linux/bug.h>
#include <linux/efi.h>
#include <linux/irqflags.h>
#include <linux/mutex.h>
-#include <linux/spinlock.h>
+#include <linux/semaphore.h>
#include <linux/stringify.h>
#include <asm/efi.h>
@@ -81,20 +83,21 @@
* +------------------------------------+-------------------------------+
*
* Due to the fact that the EFI pstore may write to the variable store in
- * interrupt context, we need to use a spinlock for at least the groups that
+ * interrupt context, we need to use a lock for at least the groups that
* contain SetVariable() and QueryVariableInfo(). That leaves little else, as
* none of the remaining functions are actually ever called at runtime.
- * So let's just use a single spinlock to serialize all Runtime Services calls.
+ * So let's just use a single lock to serialize all Runtime Services calls.
*/
-static DEFINE_SPINLOCK(efi_runtime_lock);
+static DEFINE_SEMAPHORE(efi_runtime_lock);
static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
{
efi_status_t status;
- spin_lock(&efi_runtime_lock);
+ if (down_interruptible(&efi_runtime_lock))
+ return EFI_ABORTED;
status = efi_call_virt(get_time, tm, tc);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
return status;
}
@@ -102,9 +105,10 @@
{
efi_status_t status;
- spin_lock(&efi_runtime_lock);
+ if (down_interruptible(&efi_runtime_lock))
+ return EFI_ABORTED;
status = efi_call_virt(set_time, tm);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
return status;
}
@@ -114,9 +118,10 @@
{
efi_status_t status;
- spin_lock(&efi_runtime_lock);
+ if (down_interruptible(&efi_runtime_lock))
+ return EFI_ABORTED;
status = efi_call_virt(get_wakeup_time, enabled, pending, tm);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
return status;
}
@@ -124,9 +129,10 @@
{
efi_status_t status;
- spin_lock(&efi_runtime_lock);
+ if (down_interruptible(&efi_runtime_lock))
+ return EFI_ABORTED;
status = efi_call_virt(set_wakeup_time, enabled, tm);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
return status;
}
@@ -138,10 +144,11 @@
{
efi_status_t status;
- spin_lock(&efi_runtime_lock);
+ if (down_interruptible(&efi_runtime_lock))
+ return EFI_ABORTED;
status = efi_call_virt(get_variable, name, vendor, attr, data_size,
data);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
return status;
}
@@ -151,9 +158,10 @@
{
efi_status_t status;
- spin_lock(&efi_runtime_lock);
+ if (down_interruptible(&efi_runtime_lock))
+ return EFI_ABORTED;
status = efi_call_virt(get_next_variable, name_size, name, vendor);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
return status;
}
@@ -165,10 +173,11 @@
{
efi_status_t status;
- spin_lock(&efi_runtime_lock);
+ if (down_interruptible(&efi_runtime_lock))
+ return EFI_ABORTED;
status = efi_call_virt(set_variable, name, vendor, attr, data_size,
data);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
return status;
}
@@ -179,12 +188,12 @@
{
efi_status_t status;
- if (!spin_trylock(&efi_runtime_lock))
+ if (down_trylock(&efi_runtime_lock))
return EFI_NOT_READY;
status = efi_call_virt(set_variable, name, vendor, attr, data_size,
data);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
return status;
}
@@ -199,10 +208,11 @@
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
- spin_lock(&efi_runtime_lock);
+ if (down_interruptible(&efi_runtime_lock))
+ return EFI_ABORTED;
status = efi_call_virt(query_variable_info, attr, storage_space,
remaining_space, max_variable_size);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
return status;
}
@@ -217,12 +227,12 @@
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
- if (!spin_trylock(&efi_runtime_lock))
+ if (down_trylock(&efi_runtime_lock))
return EFI_NOT_READY;
status = efi_call_virt(query_variable_info, attr, storage_space,
remaining_space, max_variable_size);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
return status;
}
@@ -230,9 +240,10 @@
{
efi_status_t status;
- spin_lock(&efi_runtime_lock);
+ if (down_interruptible(&efi_runtime_lock))
+ return EFI_ABORTED;
status = efi_call_virt(get_next_high_mono_count, count);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
return status;
}
@@ -241,9 +252,13 @@
unsigned long data_size,
efi_char16_t *data)
{
- spin_lock(&efi_runtime_lock);
+ if (down_interruptible(&efi_runtime_lock)) {
+ pr_warn("failed to invoke the reset_system() runtime service:\n"
+ "could not get exclusive access to the firmware\n");
+ return;
+ }
__efi_call_virt(reset_system, reset_type, status, data_size, data);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
}
static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
@@ -255,9 +270,10 @@
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
- spin_lock(&efi_runtime_lock);
+ if (down_interruptible(&efi_runtime_lock))
+ return EFI_ABORTED;
status = efi_call_virt(update_capsule, capsules, count, sg_list);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
return status;
}
@@ -271,10 +287,11 @@
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
- spin_lock(&efi_runtime_lock);
+ if (down_interruptible(&efi_runtime_lock))
+ return EFI_ABORTED;
status = efi_call_virt(query_capsule_caps, capsules, count, max_size,
reset_type);
- spin_unlock(&efi_runtime_lock);
+ up(&efi_runtime_lock);
return status;
}
diff --git a/drivers/firmware/efi/test/Makefile b/drivers/firmware/efi/test/Makefile
new file mode 100644
index 0000000..bcd4577
--- /dev/null
+++ b/drivers/firmware/efi/test/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_EFI_TEST) += efi_test.o
diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c
new file mode 100644
index 0000000..f61bb52
--- /dev/null
+++ b/drivers/firmware/efi/test/efi_test.c
@@ -0,0 +1,749 @@
+/*
+ * EFI Test Driver for Runtime Services
+ *
+ * Copyright(C) 2012-2016 Canonical Ltd.
+ *
+ * This driver exports EFI runtime services interfaces into userspace, which
+ * allow to use and test UEFI runtime services provided by firmware.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/efi.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "efi_test.h"
+
+MODULE_AUTHOR("Ivan Hu <ivan.hu@canonical.com>");
+MODULE_DESCRIPTION("EFI Test Driver");
+MODULE_LICENSE("GPL");
+
+/*
+ * Count the bytes in 'str', including the terminating NULL.
+ *
+ * Note this function returns the number of *bytes*, not the number of
+ * ucs2 characters.
+ */
+static inline size_t user_ucs2_strsize(efi_char16_t __user *str)
+{
+ efi_char16_t *s = str, c;
+ size_t len;
+
+ if (!str)
+ return 0;
+
+ /* Include terminating NULL */
+ len = sizeof(efi_char16_t);
+
+ if (get_user(c, s++)) {
+ /* Can't read userspace memory for size */
+ return 0;
+ }
+
+ while (c != 0) {
+ if (get_user(c, s++)) {
+ /* Can't read userspace memory for size */
+ return 0;
+ }
+ len += sizeof(efi_char16_t);
+ }
+ return len;
+}
+
+/*
+ * Allocate a buffer and copy a ucs2 string from user space into it.
+ */
+static inline int
+copy_ucs2_from_user_len(efi_char16_t **dst, efi_char16_t __user *src,
+ size_t len)
+{
+ efi_char16_t *buf;
+
+ if (!src) {
+ *dst = NULL;
+ return 0;
+ }
+
+ if (!access_ok(VERIFY_READ, src, 1))
+ return -EFAULT;
+
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf) {
+ *dst = NULL;
+ return -ENOMEM;
+ }
+ *dst = buf;
+
+ if (copy_from_user(*dst, src, len)) {
+ kfree(buf);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/*
+ * Count the bytes in 'str', including the terminating NULL.
+ *
+ * Just a wrap for user_ucs2_strsize
+ */
+static inline int
+get_ucs2_strsize_from_user(efi_char16_t __user *src, size_t *len)
+{
+ if (!access_ok(VERIFY_READ, src, 1))
+ return -EFAULT;
+
+ *len = user_ucs2_strsize(src);
+ if (*len == 0)
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ * Calculate the required buffer allocation size and copy a ucs2 string
+ * from user space into it.
+ *
+ * This function differs from copy_ucs2_from_user_len() because it
+ * calculates the size of the buffer to allocate by taking the length of
+ * the string 'src'.
+ *
+ * If a non-zero value is returned, the caller MUST NOT access 'dst'.
+ *
+ * It is the caller's responsibility to free 'dst'.
+ */
+static inline int
+copy_ucs2_from_user(efi_char16_t **dst, efi_char16_t __user *src)
+{
+ size_t len;
+
+ if (!access_ok(VERIFY_READ, src, 1))
+ return -EFAULT;
+
+ len = user_ucs2_strsize(src);
+ if (len == 0)
+ return -EFAULT;
+ return copy_ucs2_from_user_len(dst, src, len);
+}
+
+/*
+ * Copy a ucs2 string to a user buffer.
+ *
+ * This function is a simple wrapper around copy_to_user() that does
+ * nothing if 'src' is NULL, which is useful for reducing the amount of
+ * NULL checking the caller has to do.
+ *
+ * 'len' specifies the number of bytes to copy.
+ */
+static inline int
+copy_ucs2_to_user_len(efi_char16_t __user *dst, efi_char16_t *src, size_t len)
+{
+ if (!src)
+ return 0;
+
+ if (!access_ok(VERIFY_WRITE, dst, 1))
+ return -EFAULT;
+
+ return copy_to_user(dst, src, len);
+}
+
+static long efi_runtime_get_variable(unsigned long arg)
+{
+ struct efi_getvariable __user *getvariable_user;
+ struct efi_getvariable getvariable;
+ unsigned long datasize, prev_datasize, *dz;
+ efi_guid_t vendor_guid, *vd = NULL;
+ efi_status_t status;
+ efi_char16_t *name = NULL;
+ u32 attr, *at;
+ void *data = NULL;
+ int rv = 0;
+
+ getvariable_user = (struct efi_getvariable __user *)arg;
+
+ if (copy_from_user(&getvariable, getvariable_user,
+ sizeof(getvariable)))
+ return -EFAULT;
+ if (getvariable.data_size &&
+ get_user(datasize, getvariable.data_size))
+ return -EFAULT;
+ if (getvariable.vendor_guid) {
+ if (copy_from_user(&vendor_guid, getvariable.vendor_guid,
+ sizeof(vendor_guid)))
+ return -EFAULT;
+ vd = &vendor_guid;
+ }
+
+ if (getvariable.variable_name) {
+ rv = copy_ucs2_from_user(&name, getvariable.variable_name);
+ if (rv)
+ return rv;
+ }
+
+ at = getvariable.attributes ? &attr : NULL;
+ dz = getvariable.data_size ? &datasize : NULL;
+
+ if (getvariable.data_size && getvariable.data) {
+ data = kmalloc(datasize, GFP_KERNEL);
+ if (!data) {
+ kfree(name);
+ return -ENOMEM;
+ }
+ }
+
+ prev_datasize = datasize;
+ status = efi.get_variable(name, vd, at, dz, data);
+ kfree(name);
+
+ if (put_user(status, getvariable.status)) {
+ rv = -EFAULT;
+ goto out;
+ }
+
+ if (status != EFI_SUCCESS) {
+ if (status == EFI_BUFFER_TOO_SMALL) {
+ if (dz && put_user(datasize, getvariable.data_size)) {
+ rv = -EFAULT;
+ goto out;
+ }
+ }
+ rv = -EINVAL;
+ goto out;
+ }
+
+ if (prev_datasize < datasize) {
+ rv = -EINVAL;
+ goto out;
+ }
+
+ if (data) {
+ if (copy_to_user(getvariable.data, data, datasize)) {
+ rv = -EFAULT;
+ goto out;
+ }
+ }
+
+ if (at && put_user(attr, getvariable.attributes)) {
+ rv = -EFAULT;
+ goto out;
+ }
+
+ if (dz && put_user(datasize, getvariable.data_size))
+ rv = -EFAULT;
+
+out:
+ kfree(data);
+ return rv;
+
+}
+
+static long efi_runtime_set_variable(unsigned long arg)
+{
+ struct efi_setvariable __user *setvariable_user;
+ struct efi_setvariable setvariable;
+ efi_guid_t vendor_guid;
+ efi_status_t status;
+ efi_char16_t *name = NULL;
+ void *data;
+ int rv = 0;
+
+ setvariable_user = (struct efi_setvariable __user *)arg;
+
+ if (copy_from_user(&setvariable, setvariable_user, sizeof(setvariable)))
+ return -EFAULT;
+ if (copy_from_user(&vendor_guid, setvariable.vendor_guid,
+ sizeof(vendor_guid)))
+ return -EFAULT;
+
+ if (setvariable.variable_name) {
+ rv = copy_ucs2_from_user(&name, setvariable.variable_name);
+ if (rv)
+ return rv;
+ }
+
+ data = kmalloc(setvariable.data_size, GFP_KERNEL);
+ if (!data) {
+ kfree(name);
+ return -ENOMEM;
+ }
+ if (copy_from_user(data, setvariable.data, setvariable.data_size)) {
+ rv = -EFAULT;
+ goto out;
+ }
+
+ status = efi.set_variable(name, &vendor_guid,
+ setvariable.attributes,
+ setvariable.data_size, data);
+
+ if (put_user(status, setvariable.status)) {
+ rv = -EFAULT;
+ goto out;
+ }
+
+ rv = status == EFI_SUCCESS ? 0 : -EINVAL;
+
+out:
+ kfree(data);
+ kfree(name);
+
+ return rv;
+}
+
+static long efi_runtime_get_time(unsigned long arg)
+{
+ struct efi_gettime __user *gettime_user;
+ struct efi_gettime gettime;
+ efi_status_t status;
+ efi_time_cap_t cap;
+ efi_time_t efi_time;
+
+ gettime_user = (struct efi_gettime __user *)arg;
+ if (copy_from_user(&gettime, gettime_user, sizeof(gettime)))
+ return -EFAULT;
+
+ status = efi.get_time(gettime.time ? &efi_time : NULL,
+ gettime.capabilities ? &cap : NULL);
+
+ if (put_user(status, gettime.status))
+ return -EFAULT;
+
+ if (status != EFI_SUCCESS)
+ return -EINVAL;
+
+ if (gettime.capabilities) {
+ efi_time_cap_t __user *cap_local;
+
+ cap_local = (efi_time_cap_t *)gettime.capabilities;
+ if (put_user(cap.resolution, &(cap_local->resolution)) ||
+ put_user(cap.accuracy, &(cap_local->accuracy)) ||
+ put_user(cap.sets_to_zero, &(cap_local->sets_to_zero)))
+ return -EFAULT;
+ }
+ if (gettime.time) {
+ if (copy_to_user(gettime.time, &efi_time, sizeof(efi_time_t)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static long efi_runtime_set_time(unsigned long arg)
+{
+ struct efi_settime __user *settime_user;
+ struct efi_settime settime;
+ efi_status_t status;
+ efi_time_t efi_time;
+
+ settime_user = (struct efi_settime __user *)arg;
+ if (copy_from_user(&settime, settime_user, sizeof(settime)))
+ return -EFAULT;
+ if (copy_from_user(&efi_time, settime.time,
+ sizeof(efi_time_t)))
+ return -EFAULT;
+ status = efi.set_time(&efi_time);
+
+ if (put_user(status, settime.status))
+ return -EFAULT;
+
+ return status == EFI_SUCCESS ? 0 : -EINVAL;
+}
+
+static long efi_runtime_get_waketime(unsigned long arg)
+{
+ struct efi_getwakeuptime __user *getwakeuptime_user;
+ struct efi_getwakeuptime getwakeuptime;
+ efi_bool_t enabled, pending;
+ efi_status_t status;
+ efi_time_t efi_time;
+
+ getwakeuptime_user = (struct efi_getwakeuptime __user *)arg;
+ if (copy_from_user(&getwakeuptime, getwakeuptime_user,
+ sizeof(getwakeuptime)))
+ return -EFAULT;
+
+ status = efi.get_wakeup_time(
+ getwakeuptime.enabled ? (efi_bool_t *)&enabled : NULL,
+ getwakeuptime.pending ? (efi_bool_t *)&pending : NULL,
+ getwakeuptime.time ? &efi_time : NULL);
+
+ if (put_user(status, getwakeuptime.status))
+ return -EFAULT;
+
+ if (status != EFI_SUCCESS)
+ return -EINVAL;
+
+ if (getwakeuptime.enabled && put_user(enabled,
+ getwakeuptime.enabled))
+ return -EFAULT;
+
+ if (getwakeuptime.time) {
+ if (copy_to_user(getwakeuptime.time, &efi_time,
+ sizeof(efi_time_t)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static long efi_runtime_set_waketime(unsigned long arg)
+{
+ struct efi_setwakeuptime __user *setwakeuptime_user;
+ struct efi_setwakeuptime setwakeuptime;
+ efi_bool_t enabled;
+ efi_status_t status;
+ efi_time_t efi_time;
+
+ setwakeuptime_user = (struct efi_setwakeuptime __user *)arg;
+
+ if (copy_from_user(&setwakeuptime, setwakeuptime_user,
+ sizeof(setwakeuptime)))
+ return -EFAULT;
+
+ enabled = setwakeuptime.enabled;
+ if (setwakeuptime.time) {
+ if (copy_from_user(&efi_time, setwakeuptime.time,
+ sizeof(efi_time_t)))
+ return -EFAULT;
+
+ status = efi.set_wakeup_time(enabled, &efi_time);
+ } else
+ status = efi.set_wakeup_time(enabled, NULL);
+
+ if (put_user(status, setwakeuptime.status))
+ return -EFAULT;
+
+ return status == EFI_SUCCESS ? 0 : -EINVAL;
+}
+
+static long efi_runtime_get_nextvariablename(unsigned long arg)
+{
+ struct efi_getnextvariablename __user *getnextvariablename_user;
+ struct efi_getnextvariablename getnextvariablename;
+ unsigned long name_size, prev_name_size = 0, *ns = NULL;
+ efi_status_t status;
+ efi_guid_t *vd = NULL;
+ efi_guid_t vendor_guid;
+ efi_char16_t *name = NULL;
+ int rv;
+
+ getnextvariablename_user = (struct efi_getnextvariablename __user *)arg;
+
+ if (copy_from_user(&getnextvariablename, getnextvariablename_user,
+ sizeof(getnextvariablename)))
+ return -EFAULT;
+
+ if (getnextvariablename.variable_name_size) {
+ if (get_user(name_size, getnextvariablename.variable_name_size))
+ return -EFAULT;
+ ns = &name_size;
+ prev_name_size = name_size;
+ }
+
+ if (getnextvariablename.vendor_guid) {
+ if (copy_from_user(&vendor_guid,
+ getnextvariablename.vendor_guid,
+ sizeof(vendor_guid)))
+ return -EFAULT;
+ vd = &vendor_guid;
+ }
+
+ if (getnextvariablename.variable_name) {
+ size_t name_string_size = 0;
+
+ rv = get_ucs2_strsize_from_user(
+ getnextvariablename.variable_name,
+ &name_string_size);
+ if (rv)
+ return rv;
+ /*
+ * The name_size may be smaller than the real buffer size where
+ * variable name located in some use cases. The most typical
+ * case is passing a 0 to get the required buffer size for the
+ * 1st time call. So we need to copy the content from user
+ * space for at least the string size of variable name, or else
+ * the name passed to UEFI may not be terminated as we expected.
+ */
+ rv = copy_ucs2_from_user_len(&name,
+ getnextvariablename.variable_name,
+ prev_name_size > name_string_size ?
+ prev_name_size : name_string_size);
+ if (rv)
+ return rv;
+ }
+
+ status = efi.get_next_variable(ns, name, vd);
+
+ if (put_user(status, getnextvariablename.status)) {
+ rv = -EFAULT;
+ goto out;
+ }
+
+ if (status != EFI_SUCCESS) {
+ if (status == EFI_BUFFER_TOO_SMALL) {
+ if (ns && put_user(*ns,
+ getnextvariablename.variable_name_size)) {
+ rv = -EFAULT;
+ goto out;
+ }
+ }
+ rv = -EINVAL;
+ goto out;
+ }
+
+ if (name) {
+ if (copy_ucs2_to_user_len(getnextvariablename.variable_name,
+ name, prev_name_size)) {
+ rv = -EFAULT;
+ goto out;
+ }
+ }
+
+ if (ns) {
+ if (put_user(*ns, getnextvariablename.variable_name_size)) {
+ rv = -EFAULT;
+ goto out;
+ }
+ }
+
+ if (vd) {
+ if (copy_to_user(getnextvariablename.vendor_guid, vd,
+ sizeof(efi_guid_t)))
+ rv = -EFAULT;
+ }
+
+out:
+ kfree(name);
+ return rv;
+}
+
+static long efi_runtime_get_nexthighmonocount(unsigned long arg)
+{
+ struct efi_getnexthighmonotoniccount __user *getnexthighmonocount_user;
+ struct efi_getnexthighmonotoniccount getnexthighmonocount;
+ efi_status_t status;
+ u32 count;
+
+ getnexthighmonocount_user = (struct
+ efi_getnexthighmonotoniccount __user *)arg;
+
+ if (copy_from_user(&getnexthighmonocount,
+ getnexthighmonocount_user,
+ sizeof(getnexthighmonocount)))
+ return -EFAULT;
+
+ status = efi.get_next_high_mono_count(
+ getnexthighmonocount.high_count ? &count : NULL);
+
+ if (put_user(status, getnexthighmonocount.status))
+ return -EFAULT;
+
+ if (status != EFI_SUCCESS)
+ return -EINVAL;
+
+ if (getnexthighmonocount.high_count &&
+ put_user(count, getnexthighmonocount.high_count))
+ return -EFAULT;
+
+ return 0;
+}
+
+static long efi_runtime_query_variableinfo(unsigned long arg)
+{
+ struct efi_queryvariableinfo __user *queryvariableinfo_user;
+ struct efi_queryvariableinfo queryvariableinfo;
+ efi_status_t status;
+ u64 max_storage, remaining, max_size;
+
+ queryvariableinfo_user = (struct efi_queryvariableinfo __user *)arg;
+
+ if (copy_from_user(&queryvariableinfo, queryvariableinfo_user,
+ sizeof(queryvariableinfo)))
+ return -EFAULT;
+
+ status = efi.query_variable_info(queryvariableinfo.attributes,
+ &max_storage, &remaining, &max_size);
+
+ if (put_user(status, queryvariableinfo.status))
+ return -EFAULT;
+
+ if (status != EFI_SUCCESS)
+ return -EINVAL;
+
+ if (put_user(max_storage,
+ queryvariableinfo.maximum_variable_storage_size))
+ return -EFAULT;
+
+ if (put_user(remaining,
+ queryvariableinfo.remaining_variable_storage_size))
+ return -EFAULT;
+
+ if (put_user(max_size, queryvariableinfo.maximum_variable_size))
+ return -EFAULT;
+
+ return 0;
+}
+
+static long efi_runtime_query_capsulecaps(unsigned long arg)
+{
+ struct efi_querycapsulecapabilities __user *qcaps_user;
+ struct efi_querycapsulecapabilities qcaps;
+ efi_capsule_header_t *capsules;
+ efi_status_t status;
+ u64 max_size;
+ int i, reset_type;
+ int rv = 0;
+
+ qcaps_user = (struct efi_querycapsulecapabilities __user *)arg;
+
+ if (copy_from_user(&qcaps, qcaps_user, sizeof(qcaps)))
+ return -EFAULT;
+
+ capsules = kcalloc(qcaps.capsule_count + 1,
+ sizeof(efi_capsule_header_t), GFP_KERNEL);
+ if (!capsules)
+ return -ENOMEM;
+
+ for (i = 0; i < qcaps.capsule_count; i++) {
+ efi_capsule_header_t *c;
+ /*
+ * We cannot dereference qcaps.capsule_header_array directly to
+ * obtain the address of the capsule as it resides in the
+ * user space
+ */
+ if (get_user(c, qcaps.capsule_header_array + i)) {
+ rv = -EFAULT;
+ goto out;
+ }
+ if (copy_from_user(&capsules[i], c,
+ sizeof(efi_capsule_header_t))) {
+ rv = -EFAULT;
+ goto out;
+ }
+ }
+
+ qcaps.capsule_header_array = &capsules;
+
+ status = efi.query_capsule_caps((efi_capsule_header_t **)
+ qcaps.capsule_header_array,
+ qcaps.capsule_count,
+ &max_size, &reset_type);
+
+ if (put_user(status, qcaps.status)) {
+ rv = -EFAULT;
+ goto out;
+ }
+
+ if (status != EFI_SUCCESS) {
+ rv = -EINVAL;
+ goto out;
+ }
+
+ if (put_user(max_size, qcaps.maximum_capsule_size)) {
+ rv = -EFAULT;
+ goto out;
+ }
+
+ if (put_user(reset_type, qcaps.reset_type))
+ rv = -EFAULT;
+
+out:
+ kfree(capsules);
+ return rv;
+}
+
+static long efi_test_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ switch (cmd) {
+ case EFI_RUNTIME_GET_VARIABLE:
+ return efi_runtime_get_variable(arg);
+
+ case EFI_RUNTIME_SET_VARIABLE:
+ return efi_runtime_set_variable(arg);
+
+ case EFI_RUNTIME_GET_TIME:
+ return efi_runtime_get_time(arg);
+
+ case EFI_RUNTIME_SET_TIME:
+ return efi_runtime_set_time(arg);
+
+ case EFI_RUNTIME_GET_WAKETIME:
+ return efi_runtime_get_waketime(arg);
+
+ case EFI_RUNTIME_SET_WAKETIME:
+ return efi_runtime_set_waketime(arg);
+
+ case EFI_RUNTIME_GET_NEXTVARIABLENAME:
+ return efi_runtime_get_nextvariablename(arg);
+
+ case EFI_RUNTIME_GET_NEXTHIGHMONOTONICCOUNT:
+ return efi_runtime_get_nexthighmonocount(arg);
+
+ case EFI_RUNTIME_QUERY_VARIABLEINFO:
+ return efi_runtime_query_variableinfo(arg);
+
+ case EFI_RUNTIME_QUERY_CAPSULECAPABILITIES:
+ return efi_runtime_query_capsulecaps(arg);
+ }
+
+ return -ENOTTY;
+}
+
+static int efi_test_open(struct inode *inode, struct file *file)
+{
+ /*
+ * nothing special to do here
+ * We do accept multiple open files at the same time as we
+ * synchronize on the per call operation.
+ */
+ return 0;
+}
+
+static int efi_test_close(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+
+/*
+ * The various file operations we support.
+ */
+static const struct file_operations efi_test_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = efi_test_ioctl,
+ .open = efi_test_open,
+ .release = efi_test_close,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice efi_test_dev = {
+ MISC_DYNAMIC_MINOR,
+ "efi_test",
+ &efi_test_fops
+};
+
+static int __init efi_test_init(void)
+{
+ int ret;
+
+ ret = misc_register(&efi_test_dev);
+ if (ret) {
+ pr_err("efi_test: can't misc_register on minor=%d\n",
+ MISC_DYNAMIC_MINOR);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void __exit efi_test_exit(void)
+{
+ misc_deregister(&efi_test_dev);
+}
+
+module_init(efi_test_init);
+module_exit(efi_test_exit);
diff --git a/drivers/firmware/efi/test/efi_test.h b/drivers/firmware/efi/test/efi_test.h
new file mode 100644
index 0000000..a33a6c6
--- /dev/null
+++ b/drivers/firmware/efi/test/efi_test.h
@@ -0,0 +1,110 @@
+/*
+ * EFI Test driver Header
+ *
+ * Copyright(C) 2012-2016 Canonical Ltd.
+ *
+ */
+
+#ifndef _DRIVERS_FIRMWARE_EFI_TEST_H_
+#define _DRIVERS_FIRMWARE_EFI_TEST_H_
+
+#include <linux/efi.h>
+
+struct efi_getvariable {
+ efi_char16_t *variable_name;
+ efi_guid_t *vendor_guid;
+ u32 *attributes;
+ unsigned long *data_size;
+ void *data;
+ efi_status_t *status;
+} __packed;
+
+struct efi_setvariable {
+ efi_char16_t *variable_name;
+ efi_guid_t *vendor_guid;
+ u32 attributes;
+ unsigned long data_size;
+ void *data;
+ efi_status_t *status;
+} __packed;
+
+struct efi_getnextvariablename {
+ unsigned long *variable_name_size;
+ efi_char16_t *variable_name;
+ efi_guid_t *vendor_guid;
+ efi_status_t *status;
+} __packed;
+
+struct efi_queryvariableinfo {
+ u32 attributes;
+ u64 *maximum_variable_storage_size;
+ u64 *remaining_variable_storage_size;
+ u64 *maximum_variable_size;
+ efi_status_t *status;
+} __packed;
+
+struct efi_gettime {
+ efi_time_t *time;
+ efi_time_cap_t *capabilities;
+ efi_status_t *status;
+} __packed;
+
+struct efi_settime {
+ efi_time_t *time;
+ efi_status_t *status;
+} __packed;
+
+struct efi_getwakeuptime {
+ efi_bool_t *enabled;
+ efi_bool_t *pending;
+ efi_time_t *time;
+ efi_status_t *status;
+} __packed;
+
+struct efi_setwakeuptime {
+ efi_bool_t enabled;
+ efi_time_t *time;
+ efi_status_t *status;
+} __packed;
+
+struct efi_getnexthighmonotoniccount {
+ u32 *high_count;
+ efi_status_t *status;
+} __packed;
+
+struct efi_querycapsulecapabilities {
+ efi_capsule_header_t **capsule_header_array;
+ unsigned long capsule_count;
+ u64 *maximum_capsule_size;
+ int *reset_type;
+ efi_status_t *status;
+} __packed;
+
+#define EFI_RUNTIME_GET_VARIABLE \
+ _IOWR('p', 0x01, struct efi_getvariable)
+#define EFI_RUNTIME_SET_VARIABLE \
+ _IOW('p', 0x02, struct efi_setvariable)
+
+#define EFI_RUNTIME_GET_TIME \
+ _IOR('p', 0x03, struct efi_gettime)
+#define EFI_RUNTIME_SET_TIME \
+ _IOW('p', 0x04, struct efi_settime)
+
+#define EFI_RUNTIME_GET_WAKETIME \
+ _IOR('p', 0x05, struct efi_getwakeuptime)
+#define EFI_RUNTIME_SET_WAKETIME \
+ _IOW('p', 0x06, struct efi_setwakeuptime)
+
+#define EFI_RUNTIME_GET_NEXTVARIABLENAME \
+ _IOWR('p', 0x07, struct efi_getnextvariablename)
+
+#define EFI_RUNTIME_QUERY_VARIABLEINFO \
+ _IOR('p', 0x08, struct efi_queryvariableinfo)
+
+#define EFI_RUNTIME_GET_NEXTHIGHMONOTONICCOUNT \
+ _IOR('p', 0x09, struct efi_getnexthighmonotoniccount)
+
+#define EFI_RUNTIME_QUERY_CAPSULECAPABILITIES \
+ _IOR('p', 0x0A, struct efi_querycapsulecapabilities)
+
+#endif /* _DRIVERS_FIRMWARE_EFI_TEST_H_ */
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index d3b7513..9336ffd 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -37,6 +37,14 @@
/* Private pointer to registered efivars */
static struct efivars *__efivars;
+/*
+ * efivars_lock protects three things:
+ * 1) efivarfs_list and efivars_sysfs_list
+ * 2) ->ops calls
+ * 3) (un)registration of __efivars
+ */
+static DEFINE_SEMAPHORE(efivars_lock);
+
static bool efivar_wq_enabled = true;
DECLARE_WORK(efivar_work, NULL);
EXPORT_SYMBOL_GPL(efivar_work);
@@ -434,7 +442,10 @@
return -ENOMEM;
}
- spin_lock_irq(&__efivars->lock);
+ if (down_interruptible(&efivars_lock)) {
+ err = -EINTR;
+ goto free;
+ }
/*
* Per EFI spec, the maximum storage allocated for both
@@ -450,7 +461,7 @@
switch (status) {
case EFI_SUCCESS:
if (duplicates)
- spin_unlock_irq(&__efivars->lock);
+ up(&efivars_lock);
variable_name_size = var_name_strnsize(variable_name,
variable_name_size);
@@ -476,8 +487,12 @@
status = EFI_NOT_FOUND;
}
- if (duplicates)
- spin_lock_irq(&__efivars->lock);
+ if (duplicates) {
+ if (down_interruptible(&efivars_lock)) {
+ err = -EINTR;
+ goto free;
+ }
+ }
break;
case EFI_NOT_FOUND:
@@ -491,8 +506,8 @@
} while (status != EFI_NOT_FOUND);
- spin_unlock_irq(&__efivars->lock);
-
+ up(&efivars_lock);
+free:
kfree(variable_name);
return err;
@@ -503,24 +518,34 @@
* efivar_entry_add - add entry to variable list
* @entry: entry to add to list
* @head: list head
+ *
+ * Returns 0 on success, or a kernel error code on failure.
*/
-void efivar_entry_add(struct efivar_entry *entry, struct list_head *head)
+int efivar_entry_add(struct efivar_entry *entry, struct list_head *head)
{
- spin_lock_irq(&__efivars->lock);
+ if (down_interruptible(&efivars_lock))
+ return -EINTR;
list_add(&entry->list, head);
- spin_unlock_irq(&__efivars->lock);
+ up(&efivars_lock);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(efivar_entry_add);
/**
* efivar_entry_remove - remove entry from variable list
* @entry: entry to remove from list
+ *
+ * Returns 0 on success, or a kernel error code on failure.
*/
-void efivar_entry_remove(struct efivar_entry *entry)
+int efivar_entry_remove(struct efivar_entry *entry)
{
- spin_lock_irq(&__efivars->lock);
+ if (down_interruptible(&efivars_lock))
+ return -EINTR;
list_del(&entry->list);
- spin_unlock_irq(&__efivars->lock);
+ up(&efivars_lock);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(efivar_entry_remove);
@@ -537,10 +562,8 @@
*/
static void efivar_entry_list_del_unlock(struct efivar_entry *entry)
{
- lockdep_assert_held(&__efivars->lock);
-
list_del(&entry->list);
- spin_unlock_irq(&__efivars->lock);
+ up(&efivars_lock);
}
/**
@@ -563,8 +586,6 @@
const struct efivar_operations *ops = __efivars->ops;
efi_status_t status;
- lockdep_assert_held(&__efivars->lock);
-
status = ops->set_variable(entry->var.VariableName,
&entry->var.VendorGuid,
0, 0, NULL);
@@ -581,20 +602,22 @@
* variable list. It is the caller's responsibility to free @entry
* once we return.
*
- * Returns 0 on success, or a converted EFI status code if
- * set_variable() fails.
+ * Returns 0 on success, -EINTR if we can't grab the semaphore,
+ * converted EFI status code if set_variable() fails.
*/
int efivar_entry_delete(struct efivar_entry *entry)
{
const struct efivar_operations *ops = __efivars->ops;
efi_status_t status;
- spin_lock_irq(&__efivars->lock);
+ if (down_interruptible(&efivars_lock))
+ return -EINTR;
+
status = ops->set_variable(entry->var.VariableName,
&entry->var.VendorGuid,
0, 0, NULL);
if (!(status == EFI_SUCCESS || status == EFI_NOT_FOUND)) {
- spin_unlock_irq(&__efivars->lock);
+ up(&efivars_lock);
return efi_status_to_err(status);
}
@@ -620,9 +643,9 @@
* If @head is not NULL a lookup is performed to determine whether
* the entry is already on the list.
*
- * Returns 0 on success, -EEXIST if a lookup is performed and the entry
- * already exists on the list, or a converted EFI status code if
- * set_variable() fails.
+ * Returns 0 on success, -EINTR if we can't grab the semaphore,
+ * -EEXIST if a lookup is performed and the entry already exists on
+ * the list, or a converted EFI status code if set_variable() fails.
*/
int efivar_entry_set(struct efivar_entry *entry, u32 attributes,
unsigned long size, void *data, struct list_head *head)
@@ -632,10 +655,10 @@
efi_char16_t *name = entry->var.VariableName;
efi_guid_t vendor = entry->var.VendorGuid;
- spin_lock_irq(&__efivars->lock);
-
+ if (down_interruptible(&efivars_lock))
+ return -EINTR;
if (head && efivar_entry_find(name, vendor, head, false)) {
- spin_unlock_irq(&__efivars->lock);
+ up(&efivars_lock);
return -EEXIST;
}
@@ -644,7 +667,7 @@
status = ops->set_variable(name, &vendor,
attributes, size, data);
- spin_unlock_irq(&__efivars->lock);
+ up(&efivars_lock);
return efi_status_to_err(status);
@@ -658,30 +681,29 @@
* from crash/panic handlers.
*
* Crucially, this function will not block if it cannot acquire
- * __efivars->lock. Instead, it returns -EBUSY.
+ * efivars_lock. Instead, it returns -EBUSY.
*/
static int
efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor,
u32 attributes, unsigned long size, void *data)
{
const struct efivar_operations *ops = __efivars->ops;
- unsigned long flags;
efi_status_t status;
- if (!spin_trylock_irqsave(&__efivars->lock, flags))
+ if (down_trylock(&efivars_lock))
return -EBUSY;
status = check_var_size_nonblocking(attributes,
size + ucs2_strsize(name, 1024));
if (status != EFI_SUCCESS) {
- spin_unlock_irqrestore(&__efivars->lock, flags);
+ up(&efivars_lock);
return -ENOSPC;
}
status = ops->set_variable_nonblocking(name, &vendor, attributes,
size, data);
- spin_unlock_irqrestore(&__efivars->lock, flags);
+ up(&efivars_lock);
return efi_status_to_err(status);
}
@@ -706,7 +728,6 @@
bool block, unsigned long size, void *data)
{
const struct efivar_operations *ops = __efivars->ops;
- unsigned long flags;
efi_status_t status;
if (!ops->query_variable_store)
@@ -727,21 +748,22 @@
size, data);
if (!block) {
- if (!spin_trylock_irqsave(&__efivars->lock, flags))
+ if (down_trylock(&efivars_lock))
return -EBUSY;
} else {
- spin_lock_irqsave(&__efivars->lock, flags);
+ if (down_interruptible(&efivars_lock))
+ return -EINTR;
}
status = check_var_size(attributes, size + ucs2_strsize(name, 1024));
if (status != EFI_SUCCESS) {
- spin_unlock_irqrestore(&__efivars->lock, flags);
+ up(&efivars_lock);
return -ENOSPC;
}
status = ops->set_variable(name, &vendor, attributes, size, data);
- spin_unlock_irqrestore(&__efivars->lock, flags);
+ up(&efivars_lock);
return efi_status_to_err(status);
}
@@ -771,8 +793,6 @@
int strsize1, strsize2;
bool found = false;
- lockdep_assert_held(&__efivars->lock);
-
list_for_each_entry_safe(entry, n, head, list) {
strsize1 = ucs2_strsize(name, 1024);
strsize2 = ucs2_strsize(entry->var.VariableName, 1024);
@@ -814,10 +834,11 @@
*size = 0;
- spin_lock_irq(&__efivars->lock);
+ if (down_interruptible(&efivars_lock))
+ return -EINTR;
status = ops->get_variable(entry->var.VariableName,
&entry->var.VendorGuid, NULL, size, NULL);
- spin_unlock_irq(&__efivars->lock);
+ up(&efivars_lock);
if (status != EFI_BUFFER_TOO_SMALL)
return efi_status_to_err(status);
@@ -843,8 +864,6 @@
const struct efivar_operations *ops = __efivars->ops;
efi_status_t status;
- lockdep_assert_held(&__efivars->lock);
-
status = ops->get_variable(entry->var.VariableName,
&entry->var.VendorGuid,
attributes, size, data);
@@ -866,11 +885,12 @@
const struct efivar_operations *ops = __efivars->ops;
efi_status_t status;
- spin_lock_irq(&__efivars->lock);
+ if (down_interruptible(&efivars_lock))
+ return -EINTR;
status = ops->get_variable(entry->var.VariableName,
&entry->var.VendorGuid,
attributes, size, data);
- spin_unlock_irq(&__efivars->lock);
+ up(&efivars_lock);
return efi_status_to_err(status);
}
@@ -917,7 +937,8 @@
* set_variable call, and removal of the variable from the efivars
* list (in the case of an authenticated delete).
*/
- spin_lock_irq(&__efivars->lock);
+ if (down_interruptible(&efivars_lock))
+ return -EINTR;
/*
* Ensure that the available space hasn't shrunk below the safe level
@@ -957,7 +978,7 @@
if (status == EFI_NOT_FOUND)
efivar_entry_list_del_unlock(entry);
else
- spin_unlock_irq(&__efivars->lock);
+ up(&efivars_lock);
if (status && status != EFI_BUFFER_TOO_SMALL)
return efi_status_to_err(status);
@@ -965,7 +986,7 @@
return 0;
out:
- spin_unlock_irq(&__efivars->lock);
+ up(&efivars_lock);
return err;
}
@@ -978,9 +999,9 @@
* efivar_entry_iter_end() is called. This function is usually used in
* conjunction with __efivar_entry_iter() or efivar_entry_iter().
*/
-void efivar_entry_iter_begin(void)
+int efivar_entry_iter_begin(void)
{
- spin_lock_irq(&__efivars->lock);
+ return down_interruptible(&efivars_lock);
}
EXPORT_SYMBOL_GPL(efivar_entry_iter_begin);
@@ -991,7 +1012,7 @@
*/
void efivar_entry_iter_end(void)
{
- spin_unlock_irq(&__efivars->lock);
+ up(&efivars_lock);
}
EXPORT_SYMBOL_GPL(efivar_entry_iter_end);
@@ -1067,7 +1088,9 @@
{
int err = 0;
- efivar_entry_iter_begin();
+ err = efivar_entry_iter_begin();
+ if (err)
+ return err;
err = __efivar_entry_iter(func, head, data, NULL);
efivar_entry_iter_end();
@@ -1112,12 +1135,18 @@
const struct efivar_operations *ops,
struct kobject *kobject)
{
- spin_lock_init(&efivars->lock);
+ if (down_interruptible(&efivars_lock))
+ return -EINTR;
+
efivars->ops = ops;
efivars->kobject = kobject;
__efivars = efivars;
+ pr_info("Registered efivars operations\n");
+
+ up(&efivars_lock);
+
return 0;
}
EXPORT_SYMBOL_GPL(efivars_register);
@@ -1133,6 +1162,9 @@
{
int rv;
+ if (down_interruptible(&efivars_lock))
+ return -EINTR;
+
if (!__efivars) {
printk(KERN_ERR "efivars not registered\n");
rv = -EINVAL;
@@ -1144,10 +1176,12 @@
goto out;
}
+ pr_info("Unregistered efivars operations\n");
__efivars = NULL;
rv = 0;
out:
+ up(&efivars_lock);
return rv;
}
EXPORT_SYMBOL_GPL(efivars_unregister);
diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c
index f1ab05e..c463871 100644
--- a/drivers/firmware/google/gsmi.c
+++ b/drivers/firmware/google/gsmi.c
@@ -910,8 +910,7 @@
gsmi_buf_free(gsmi_dev.param_buf);
gsmi_buf_free(gsmi_dev.data_buf);
gsmi_buf_free(gsmi_dev.name_buf);
- if (gsmi_dev.dma_pool)
- dma_pool_destroy(gsmi_dev.dma_pool);
+ dma_pool_destroy(gsmi_dev.dma_pool);
platform_device_unregister(gsmi_dev.pdev);
pr_info("gsmi: failed to load: %d\n", ret);
return ret;
diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig
index d614102..cd84934 100644
--- a/drivers/fpga/Kconfig
+++ b/drivers/fpga/Kconfig
@@ -21,6 +21,7 @@
config FPGA_MGR_ZYNQ_FPGA
tristate "Xilinx Zynq FPGA"
+ depends on ARCH_ZYNQ || COMPILE_TEST
depends on HAS_DMA
help
FPGA manager driver support for Xilinx Zynq FPGAs.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index df7ab245..39c01b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1708,11 +1708,11 @@
DRM_INFO("amdgpu: finishing device.\n");
adev->shutdown = true;
+ drm_crtc_force_disable_all(adev->ddev);
/* evict vram memory */
amdgpu_bo_evict_vram(adev);
amdgpu_ib_pool_fini(adev);
amdgpu_fence_driver_fini(adev);
- drm_crtc_force_disable_all(adev->ddev);
amdgpu_fbdev_fini(adev);
r = amdgpu_fini(adev);
kfree(adev->ip_block_status);
diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index 57676f8..a628975 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -1015,6 +1015,7 @@
return 0;
}
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
typedef struct drm_mode_fb_cmd232 {
u32 fb_id;
u32 width;
@@ -1071,6 +1072,7 @@
return 0;
}
+#endif
static drm_ioctl_compat_t *drm_compat_ioctls[] = {
[DRM_IOCTL_NR(DRM_IOCTL_VERSION32)] = compat_drm_version,
@@ -1104,7 +1106,9 @@
[DRM_IOCTL_NR(DRM_IOCTL_UPDATE_DRAW32)] = compat_drm_update_draw,
#endif
[DRM_IOCTL_NR(DRM_IOCTL_WAIT_VBLANK32)] = compat_drm_wait_vblank,
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
[DRM_IOCTL_NR(DRM_IOCTL_MODE_ADDFB232)] = compat_drm_mode_addfb2,
+#endif
};
/**
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index e016640..40ce841 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -55,11 +55,11 @@
flags = exynos_gem->flags;
/*
- * without iommu support, not support physically non-continuous memory
- * for framebuffer.
+ * Physically non-contiguous memory type for framebuffer is not
+ * supported without IOMMU.
*/
if (IS_NONCONTIG_BUFFER(flags)) {
- DRM_ERROR("cannot use this gem memory type for fb.\n");
+ DRM_ERROR("Non-contiguous GEM memory is not supported.\n");
return -EINVAL;
}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 0525c56..147ef0d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -1753,32 +1753,6 @@
return 0;
}
-#ifdef CONFIG_PM_SLEEP
-static int fimc_suspend(struct device *dev)
-{
- struct fimc_context *ctx = get_fimc_context(dev);
-
- DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
- if (pm_runtime_suspended(dev))
- return 0;
-
- return fimc_clk_ctrl(ctx, false);
-}
-
-static int fimc_resume(struct device *dev)
-{
- struct fimc_context *ctx = get_fimc_context(dev);
-
- DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
- if (!pm_runtime_suspended(dev))
- return fimc_clk_ctrl(ctx, true);
-
- return 0;
-}
-#endif
-
static int fimc_runtime_suspend(struct device *dev)
{
struct fimc_context *ctx = get_fimc_context(dev);
@@ -1799,7 +1773,8 @@
#endif
static const struct dev_pm_ops fimc_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(fimc_suspend, fimc_resume)
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
SET_RUNTIME_PM_OPS(fimc_runtime_suspend, fimc_runtime_resume, NULL)
};
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 4bf00f5..6eca8bb 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -1475,8 +1475,8 @@
return 0;
}
-#ifdef CONFIG_PM_SLEEP
-static int g2d_suspend(struct device *dev)
+#ifdef CONFIG_PM
+static int g2d_runtime_suspend(struct device *dev)
{
struct g2d_data *g2d = dev_get_drvdata(dev);
@@ -1490,25 +1490,6 @@
flush_work(&g2d->runqueue_work);
- return 0;
-}
-
-static int g2d_resume(struct device *dev)
-{
- struct g2d_data *g2d = dev_get_drvdata(dev);
-
- g2d->suspended = false;
- g2d_exec_runqueue(g2d);
-
- return 0;
-}
-#endif
-
-#ifdef CONFIG_PM
-static int g2d_runtime_suspend(struct device *dev)
-{
- struct g2d_data *g2d = dev_get_drvdata(dev);
-
clk_disable_unprepare(g2d->gate_clk);
return 0;
@@ -1523,12 +1504,16 @@
if (ret < 0)
dev_warn(dev, "failed to enable clock.\n");
+ g2d->suspended = false;
+ g2d_exec_runqueue(g2d);
+
return ret;
}
#endif
static const struct dev_pm_ops g2d_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(g2d_suspend, g2d_resume)
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
SET_RUNTIME_PM_OPS(g2d_runtime_suspend, g2d_runtime_resume, NULL)
};
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 5d20da8..52a9d26 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1760,34 +1760,7 @@
return 0;
}
-#ifdef CONFIG_PM_SLEEP
-static int gsc_suspend(struct device *dev)
-{
- struct gsc_context *ctx = get_gsc_context(dev);
-
- DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
- if (pm_runtime_suspended(dev))
- return 0;
-
- return gsc_clk_ctrl(ctx, false);
-}
-
-static int gsc_resume(struct device *dev)
-{
- struct gsc_context *ctx = get_gsc_context(dev);
-
- DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
- if (!pm_runtime_suspended(dev))
- return gsc_clk_ctrl(ctx, true);
-
- return 0;
-}
-#endif
-
-#ifdef CONFIG_PM
-static int gsc_runtime_suspend(struct device *dev)
+static int __maybe_unused gsc_runtime_suspend(struct device *dev)
{
struct gsc_context *ctx = get_gsc_context(dev);
@@ -1796,7 +1769,7 @@
return gsc_clk_ctrl(ctx, false);
}
-static int gsc_runtime_resume(struct device *dev)
+static int __maybe_unused gsc_runtime_resume(struct device *dev)
{
struct gsc_context *ctx = get_gsc_context(dev);
@@ -1804,10 +1777,10 @@
return gsc_clk_ctrl(ctx, true);
}
-#endif
static const struct dev_pm_ops gsc_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(gsc_suspend, gsc_resume)
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL)
};
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index 404367a..6591e40 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -794,29 +794,6 @@
return 0;
}
-
-#ifdef CONFIG_PM_SLEEP
-static int rotator_suspend(struct device *dev)
-{
- struct rot_context *rot = dev_get_drvdata(dev);
-
- if (pm_runtime_suspended(dev))
- return 0;
-
- return rotator_clk_crtl(rot, false);
-}
-
-static int rotator_resume(struct device *dev)
-{
- struct rot_context *rot = dev_get_drvdata(dev);
-
- if (!pm_runtime_suspended(dev))
- return rotator_clk_crtl(rot, true);
-
- return 0;
-}
-#endif
-
static int rotator_runtime_suspend(struct device *dev)
{
struct rot_context *rot = dev_get_drvdata(dev);
@@ -833,7 +810,8 @@
#endif
static const struct dev_pm_ops rotator_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(rotator_suspend, rotator_resume)
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
SET_RUNTIME_PM_OPS(rotator_runtime_suspend, rotator_runtime_resume,
NULL)
};
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index 7ea8aa7..6bc712f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -175,6 +175,7 @@
void (*fini)(struct nvkm_device *, bool suspend);
resource_size_t (*resource_addr)(struct nvkm_device *, unsigned bar);
resource_size_t (*resource_size)(struct nvkm_device *, unsigned bar);
+ bool cpu_coherent;
};
struct nvkm_device_quirk {
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 6190035..864323b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -209,7 +209,8 @@
nvbo->tile_flags = tile_flags;
nvbo->bo.bdev = &drm->ttm.bdev;
- nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
+ if (!nvxx_device(&drm->device)->func->cpu_coherent)
+ nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
nvbo->page_shift = 12;
if (drm->client.vm) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index b1b6932..62ad030 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -1614,6 +1614,7 @@
.fini = nvkm_device_pci_fini,
.resource_addr = nvkm_device_pci_resource_addr,
.resource_size = nvkm_device_pci_resource_size,
+ .cpu_coherent = !IS_ENABLED(CONFIG_ARM),
};
int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index 939682f..9b638bd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -245,6 +245,7 @@
.fini = nvkm_device_tegra_fini,
.resource_addr = nvkm_device_tegra_resource_addr,
.resource_size = nvkm_device_tegra_resource_size,
+ .cpu_coherent = false,
};
int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c
index edec30f..0a7b6ed 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c
@@ -37,7 +37,10 @@
{
struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
struct nvkm_instmem *imem = chan->fifo->base.engine.subdev.device->imem;
+
+ mutex_lock(&chan->fifo->base.engine.subdev.mutex);
nvkm_ramht_remove(imem->ramht, cookie);
+ mutex_unlock(&chan->fifo->base.engine.subdev.mutex);
}
static int
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index e6abc09..1f78ec2 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -3015,6 +3015,12 @@
if (rdev->pdev->device == 0x6811 &&
rdev->pdev->revision == 0x81)
max_mclk = 120000;
+ /* limit sclk/mclk on Jet parts for stability */
+ if (rdev->pdev->device == 0x6665 &&
+ rdev->pdev->revision == 0xc3) {
+ max_sclk = 75000;
+ max_mclk = 80000;
+ }
if (rps->vce_active) {
rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk;
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index 9688bfa..611b6b9 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -122,7 +122,7 @@
return 0;
cmd = urb->transfer_buffer;
- for (i = y; i < height ; i++) {
+ for (i = y; i < y + height ; i++) {
const int line_offset = fb->base.pitches[0] * i;
const int byte_offset = line_offset + (x * bpp);
const int dev_byte_offset = (fb->base.width * bpp * i) + (x * bpp);
diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index 99ec3ff..7f8ff39 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -779,19 +779,8 @@
.minor = UHID_MINOR,
.name = UHID_NAME,
};
+module_misc_device(uhid_misc);
-static int __init uhid_init(void)
-{
- return misc_register(&uhid_misc);
-}
-
-static void __exit uhid_exit(void)
-{
- misc_deregister(&uhid_misc);
-}
-
-module_init(uhid_init);
-module_exit(uhid_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("David Herrmann <dh.herrmann@gmail.com>");
MODULE_DESCRIPTION("User-space I/O driver support for HID subsystem");
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 56dd261..16f91c8 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -43,7 +43,12 @@
{
struct hv_monitor_page *monitorpage;
- if (channel->offermsg.monitor_allocated) {
+ /*
+ * For channels marked as in "low latency" mode
+ * bypass the monitor page mechanism.
+ */
+ if ((channel->offermsg.monitor_allocated) &&
+ (!channel->low_latency)) {
/* Each u32 represents 32 channels */
sync_set_bit(channel->offermsg.child_relid & 31,
(unsigned long *) vmbus_connection.send_int_page +
@@ -70,12 +75,14 @@
{
struct vmbus_channel_open_channel *open_msg;
struct vmbus_channel_msginfo *open_info = NULL;
- void *in, *out;
unsigned long flags;
int ret, err = 0;
- unsigned long t;
struct page *page;
+ if (send_ringbuffer_size % PAGE_SIZE ||
+ recv_ringbuffer_size % PAGE_SIZE)
+ return -EINVAL;
+
spin_lock_irqsave(&newchannel->lock, flags);
if (newchannel->state == CHANNEL_OPEN_STATE) {
newchannel->state = CHANNEL_OPENING_STATE;
@@ -95,36 +102,33 @@
recv_ringbuffer_size));
if (!page)
- out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
- get_order(send_ringbuffer_size +
- recv_ringbuffer_size));
- else
- out = (void *)page_address(page);
+ page = alloc_pages(GFP_KERNEL|__GFP_ZERO,
+ get_order(send_ringbuffer_size +
+ recv_ringbuffer_size));
- if (!out) {
+ if (!page) {
err = -ENOMEM;
- goto error0;
+ goto error_set_chnstate;
}
- in = (void *)((unsigned long)out + send_ringbuffer_size);
-
- newchannel->ringbuffer_pages = out;
+ newchannel->ringbuffer_pages = page_address(page);
newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
recv_ringbuffer_size) >> PAGE_SHIFT;
- ret = hv_ringbuffer_init(
- &newchannel->outbound, out, send_ringbuffer_size);
+ ret = hv_ringbuffer_init(&newchannel->outbound, page,
+ send_ringbuffer_size >> PAGE_SHIFT);
if (ret != 0) {
err = ret;
- goto error0;
+ goto error_free_pages;
}
- ret = hv_ringbuffer_init(
- &newchannel->inbound, in, recv_ringbuffer_size);
+ ret = hv_ringbuffer_init(&newchannel->inbound,
+ &page[send_ringbuffer_size >> PAGE_SHIFT],
+ recv_ringbuffer_size >> PAGE_SHIFT);
if (ret != 0) {
err = ret;
- goto error0;
+ goto error_free_pages;
}
@@ -132,14 +136,14 @@
newchannel->ringbuffer_gpadlhandle = 0;
ret = vmbus_establish_gpadl(newchannel,
- newchannel->outbound.ring_buffer,
- send_ringbuffer_size +
- recv_ringbuffer_size,
- &newchannel->ringbuffer_gpadlhandle);
+ page_address(page),
+ send_ringbuffer_size +
+ recv_ringbuffer_size,
+ &newchannel->ringbuffer_gpadlhandle);
if (ret != 0) {
err = ret;
- goto error0;
+ goto error_free_pages;
}
/* Create and init the channel open message */
@@ -148,7 +152,7 @@
GFP_KERNEL);
if (!open_info) {
err = -ENOMEM;
- goto error_gpadl;
+ goto error_free_gpadl;
}
init_completion(&open_info->waitevent);
@@ -164,7 +168,7 @@
if (userdatalen > MAX_USER_DEFINED_BYTES) {
err = -EINVAL;
- goto error_gpadl;
+ goto error_free_gpadl;
}
if (userdatalen)
@@ -180,14 +184,10 @@
if (ret != 0) {
err = ret;
- goto error1;
+ goto error_clean_msglist;
}
- t = wait_for_completion_timeout(&open_info->waitevent, 5*HZ);
- if (t == 0) {
- err = -ETIMEDOUT;
- goto error1;
- }
+ wait_for_completion(&open_info->waitevent);
spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
list_del(&open_info->msglistentry);
@@ -195,25 +195,27 @@
if (open_info->response.open_result.status) {
err = -EAGAIN;
- goto error_gpadl;
+ goto error_free_gpadl;
}
newchannel->state = CHANNEL_OPENED_STATE;
kfree(open_info);
return 0;
-error1:
+error_clean_msglist:
spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
list_del(&open_info->msglistentry);
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
-error_gpadl:
+error_free_gpadl:
vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
-
-error0:
- free_pages((unsigned long)out,
- get_order(send_ringbuffer_size + recv_ringbuffer_size));
kfree(open_info);
+error_free_pages:
+ hv_ringbuffer_cleanup(&newchannel->outbound);
+ hv_ringbuffer_cleanup(&newchannel->inbound);
+ __free_pages(page,
+ get_order(send_ringbuffer_size + recv_ringbuffer_size));
+error_set_chnstate:
newchannel->state = CHANNEL_OPEN_STATE;
return err;
}
@@ -238,8 +240,7 @@
* create_gpadl_header - Creates a gpadl for the specified buffer
*/
static int create_gpadl_header(void *kbuffer, u32 size,
- struct vmbus_channel_msginfo **msginfo,
- u32 *messagecount)
+ struct vmbus_channel_msginfo **msginfo)
{
int i;
int pagecount;
@@ -283,7 +284,6 @@
gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys(
kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
*msginfo = msgheader;
- *messagecount = 1;
pfnsum = pfncount;
pfnleft = pagecount - pfncount;
@@ -323,7 +323,6 @@
}
msgbody->msgsize = msgsize;
- (*messagecount)++;
gpadl_body =
(struct vmbus_channel_gpadl_body *)msgbody->msg;
@@ -352,6 +351,8 @@
msgheader = kzalloc(msgsize, GFP_KERNEL);
if (msgheader == NULL)
goto nomem;
+
+ INIT_LIST_HEAD(&msgheader->submsglist);
msgheader->msgsize = msgsize;
gpadl_header = (struct vmbus_channel_gpadl_header *)
@@ -366,7 +367,6 @@
kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
*msginfo = msgheader;
- *messagecount = 1;
}
return 0;
@@ -390,8 +390,7 @@
struct vmbus_channel_gpadl_header *gpadlmsg;
struct vmbus_channel_gpadl_body *gpadl_body;
struct vmbus_channel_msginfo *msginfo = NULL;
- struct vmbus_channel_msginfo *submsginfo;
- u32 msgcount;
+ struct vmbus_channel_msginfo *submsginfo, *tmp;
struct list_head *curr;
u32 next_gpadl_handle;
unsigned long flags;
@@ -400,7 +399,7 @@
next_gpadl_handle =
(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
- ret = create_gpadl_header(kbuffer, size, &msginfo, &msgcount);
+ ret = create_gpadl_header(kbuffer, size, &msginfo);
if (ret)
return ret;
@@ -423,24 +422,21 @@
if (ret != 0)
goto cleanup;
- if (msgcount > 1) {
- list_for_each(curr, &msginfo->submsglist) {
+ list_for_each(curr, &msginfo->submsglist) {
+ submsginfo = (struct vmbus_channel_msginfo *)curr;
+ gpadl_body =
+ (struct vmbus_channel_gpadl_body *)submsginfo->msg;
- submsginfo = (struct vmbus_channel_msginfo *)curr;
- gpadl_body =
- (struct vmbus_channel_gpadl_body *)submsginfo->msg;
+ gpadl_body->header.msgtype =
+ CHANNELMSG_GPADL_BODY;
+ gpadl_body->gpadl = next_gpadl_handle;
- gpadl_body->header.msgtype =
- CHANNELMSG_GPADL_BODY;
- gpadl_body->gpadl = next_gpadl_handle;
+ ret = vmbus_post_msg(gpadl_body,
+ submsginfo->msgsize -
+ sizeof(*submsginfo));
+ if (ret != 0)
+ goto cleanup;
- ret = vmbus_post_msg(gpadl_body,
- submsginfo->msgsize -
- sizeof(*submsginfo));
- if (ret != 0)
- goto cleanup;
-
- }
}
wait_for_completion(&msginfo->waitevent);
@@ -451,6 +447,10 @@
spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
list_del(&msginfo->msglistentry);
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+ list_for_each_entry_safe(submsginfo, tmp, &msginfo->submsglist,
+ msglistentry) {
+ kfree(submsginfo);
+ }
kfree(msginfo);
return ret;
@@ -512,7 +512,6 @@
static int vmbus_close_internal(struct vmbus_channel *channel)
{
struct vmbus_channel_close_channel *msg;
- struct tasklet_struct *tasklet;
int ret;
/*
@@ -524,8 +523,7 @@
* To resolve the race, we can serialize them by disabling the
* tasklet when the latter is running here.
*/
- tasklet = hv_context.event_dpc[channel->target_cpu];
- tasklet_disable(tasklet);
+ hv_event_tasklet_disable(channel);
/*
* In case a device driver's probe() fails (e.g.,
@@ -591,7 +589,7 @@
get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
out:
- tasklet_enable(tasklet);
+ hv_event_tasklet_enable(channel);
return ret;
}
@@ -659,7 +657,7 @@
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs,
- &signal, lock);
+ &signal, lock, channel->signal_policy);
/*
* Signalling the host is conditional on many factors:
@@ -680,11 +678,6 @@
* mechanism which can hurt the performance otherwise.
*/
- if (channel->signal_policy)
- signal = true;
- else
- kick_q = true;
-
if (((ret == 0) && kick_q && signal) ||
(ret && !is_hvsock_channel(channel)))
vmbus_setevent(channel);
@@ -777,7 +770,7 @@
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
- &signal, lock);
+ &signal, lock, channel->signal_policy);
/*
* Signalling the host is conditional on many factors:
@@ -795,11 +788,6 @@
* enough condition that it should not matter.
*/
- if (channel->signal_policy)
- signal = true;
- else
- kick_q = true;
-
if (((ret == 0) && kick_q && signal) || (ret))
vmbus_setevent(channel);
@@ -861,7 +849,7 @@
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
- &signal, lock);
+ &signal, lock, channel->signal_policy);
if (ret == 0 && signal)
vmbus_setevent(channel);
@@ -926,7 +914,7 @@
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
- &signal, lock);
+ &signal, lock, channel->signal_policy);
if (ret == 0 && signal)
vmbus_setevent(channel);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index b6c1211..96a85cd 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -21,6 +21,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
+#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/mm.h>
@@ -138,10 +139,32 @@
},
};
-static u16 hv_get_dev_type(const uuid_le *guid)
+static const struct {
+ uuid_le guid;
+} vmbus_unsupported_devs[] = {
+ { HV_AVMA1_GUID },
+ { HV_AVMA2_GUID },
+ { HV_RDV_GUID },
+};
+
+static bool is_unsupported_vmbus_devs(const uuid_le *guid)
{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
+ if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
+ return true;
+ return false;
+}
+
+static u16 hv_get_dev_type(const struct vmbus_channel *channel)
+{
+ const uuid_le *guid = &channel->offermsg.offer.if_type;
u16 i;
+ if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
+ return HV_UNKOWN;
+
for (i = HV_IDE; i < HV_UNKOWN; i++) {
if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
return i;
@@ -251,14 +274,12 @@
*/
static struct vmbus_channel *alloc_channel(void)
{
- static atomic_t chan_num = ATOMIC_INIT(0);
struct vmbus_channel *channel;
channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
if (!channel)
return NULL;
- channel->id = atomic_inc_return(&chan_num);
channel->acquire_ring_lock = true;
spin_lock_init(&channel->inbound_lock);
spin_lock_init(&channel->lock);
@@ -303,16 +324,32 @@
vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
}
+void hv_event_tasklet_disable(struct vmbus_channel *channel)
+{
+ struct tasklet_struct *tasklet;
+ tasklet = hv_context.event_dpc[channel->target_cpu];
+ tasklet_disable(tasklet);
+}
+
+void hv_event_tasklet_enable(struct vmbus_channel *channel)
+{
+ struct tasklet_struct *tasklet;
+ tasklet = hv_context.event_dpc[channel->target_cpu];
+ tasklet_enable(tasklet);
+
+ /* In case there is any pending event */
+ tasklet_schedule(tasklet);
+}
+
void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
{
unsigned long flags;
struct vmbus_channel *primary_channel;
- vmbus_release_relid(relid);
-
BUG_ON(!channel->rescind);
BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
+ hv_event_tasklet_disable(channel);
if (channel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(channel->target_cpu,
@@ -321,6 +358,7 @@
percpu_channel_deq(channel);
put_cpu();
}
+ hv_event_tasklet_enable(channel);
if (channel->primary_channel == NULL) {
list_del(&channel->listentry);
@@ -338,8 +376,11 @@
* We need to free the bit for init_vp_index() to work in the case
* of sub-channel, when we reload drivers like hv_netvsc.
*/
- cpumask_clear_cpu(channel->target_cpu,
- &primary_channel->alloced_cpus_in_node);
+ if (channel->affinity_policy == HV_LOCALIZED)
+ cpumask_clear_cpu(channel->target_cpu,
+ &primary_channel->alloced_cpus_in_node);
+
+ vmbus_release_relid(relid);
free_channel(channel);
}
@@ -405,10 +446,13 @@
goto err_free_chan;
}
- dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type);
+ dev_type = hv_get_dev_type(newchannel);
+ if (dev_type == HV_NIC)
+ set_channel_signal_state(newchannel, HV_SIGNAL_POLICY_EXPLICIT);
init_vp_index(newchannel, dev_type);
+ hv_event_tasklet_disable(newchannel);
if (newchannel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(newchannel->target_cpu,
@@ -418,6 +462,7 @@
percpu_channel_enq(newchannel);
put_cpu();
}
+ hv_event_tasklet_enable(newchannel);
/*
* This state is used to indicate a successful open
@@ -463,12 +508,11 @@
return;
err_deq_chan:
- vmbus_release_relid(newchannel->offermsg.child_relid);
-
mutex_lock(&vmbus_connection.channel_mutex);
list_del(&newchannel->listentry);
mutex_unlock(&vmbus_connection.channel_mutex);
+ hv_event_tasklet_disable(newchannel);
if (newchannel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(newchannel->target_cpu,
@@ -477,6 +521,9 @@
percpu_channel_deq(newchannel);
put_cpu();
}
+ hv_event_tasklet_enable(newchannel);
+
+ vmbus_release_relid(newchannel->offermsg.child_relid);
err_free_chan:
free_channel(newchannel);
@@ -522,17 +569,17 @@
}
/*
- * We distribute primary channels evenly across all the available
- * NUMA nodes and within the assigned NUMA node we will assign the
- * first available CPU to the primary channel.
- * The sub-channels will be assigned to the CPUs available in the
- * NUMA node evenly.
+ * Based on the channel affinity policy, we will assign the NUMA
+ * nodes.
*/
- if (!primary) {
+
+ if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
while (true) {
next_node = next_numa_node_id++;
- if (next_node == nr_node_ids)
+ if (next_node == nr_node_ids) {
next_node = next_numa_node_id = 0;
+ continue;
+ }
if (cpumask_empty(cpumask_of_node(next_node)))
continue;
break;
@@ -556,15 +603,17 @@
cur_cpu = -1;
- /*
- * Normally Hyper-V host doesn't create more subchannels than there
- * are VCPUs on the node but it is possible when not all present VCPUs
- * on the node are initialized by guest. Clear the alloced_cpus_in_node
- * to start over.
- */
- if (cpumask_equal(&primary->alloced_cpus_in_node,
- cpumask_of_node(primary->numa_node)))
- cpumask_clear(&primary->alloced_cpus_in_node);
+ if (primary->affinity_policy == HV_LOCALIZED) {
+ /*
+ * Normally Hyper-V host doesn't create more subchannels
+ * than there are VCPUs on the node but it is possible when not
+ * all present VCPUs on the node are initialized by guest.
+ * Clear the alloced_cpus_in_node to start over.
+ */
+ if (cpumask_equal(&primary->alloced_cpus_in_node,
+ cpumask_of_node(primary->numa_node)))
+ cpumask_clear(&primary->alloced_cpus_in_node);
+ }
while (true) {
cur_cpu = cpumask_next(cur_cpu, &available_mask);
@@ -575,17 +624,24 @@
continue;
}
- /*
- * NOTE: in the case of sub-channel, we clear the sub-channel
- * related bit(s) in primary->alloced_cpus_in_node in
- * hv_process_channel_removal(), so when we reload drivers
- * like hv_netvsc in SMP guest, here we're able to re-allocate
- * bit from primary->alloced_cpus_in_node.
- */
- if (!cpumask_test_cpu(cur_cpu,
- &primary->alloced_cpus_in_node)) {
- cpumask_set_cpu(cur_cpu,
- &primary->alloced_cpus_in_node);
+ if (primary->affinity_policy == HV_LOCALIZED) {
+ /*
+ * NOTE: in the case of sub-channel, we clear the
+ * sub-channel related bit(s) in
+ * primary->alloced_cpus_in_node in
+ * hv_process_channel_removal(), so when we
+ * reload drivers like hv_netvsc in SMP guest, here
+ * we're able to re-allocate
+ * bit from primary->alloced_cpus_in_node.
+ */
+ if (!cpumask_test_cpu(cur_cpu,
+ &primary->alloced_cpus_in_node)) {
+ cpumask_set_cpu(cur_cpu,
+ &primary->alloced_cpus_in_node);
+ cpumask_set_cpu(cur_cpu, alloced_mask);
+ break;
+ }
+ } else {
cpumask_set_cpu(cur_cpu, alloced_mask);
break;
}
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index fcf8a02..78e6368 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -439,7 +439,7 @@
union hv_connection_id conn_id;
int ret = 0;
int retries = 0;
- u32 msec = 1;
+ u32 usec = 1;
conn_id.asu32 = 0;
conn_id.u.id = VMBUS_MESSAGE_CONNECTION_ID;
@@ -472,9 +472,9 @@
}
retries++;
- msleep(msec);
- if (msec < 2048)
- msec *= 2;
+ udelay(usec);
+ if (usec < 2048)
+ usec *= 2;
}
return ret;
}
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index a1c086b..60dbd6c 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -278,7 +278,7 @@
*
* This routine is called normally during driver unloading or exiting.
*/
-void hv_cleanup(void)
+void hv_cleanup(bool crash)
{
union hv_x64_msr_hypercall_contents hypercall_msr;
@@ -288,7 +288,8 @@
if (hv_context.hypercall_page) {
hypercall_msr.as_uint64 = 0;
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
- vfree(hv_context.hypercall_page);
+ if (!crash)
+ vfree(hv_context.hypercall_page);
hv_context.hypercall_page = NULL;
}
@@ -308,7 +309,8 @@
hypercall_msr.as_uint64 = 0;
wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
- vfree(hv_context.tsc_page);
+ if (!crash)
+ vfree(hv_context.tsc_page);
hv_context.tsc_page = NULL;
}
#endif
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index df35fb7..fdf8da9 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -430,16 +430,27 @@
* currently hot added. We hot add in multiples of 128M
* chunks; it is possible that we may not be able to bring
* online all the pages in the region. The range
- * covered_end_pfn defines the pages that can
+ * covered_start_pfn:covered_end_pfn defines the pages that can
* be brough online.
*/
struct hv_hotadd_state {
struct list_head list;
unsigned long start_pfn;
+ unsigned long covered_start_pfn;
unsigned long covered_end_pfn;
unsigned long ha_end_pfn;
unsigned long end_pfn;
+ /*
+ * A list of gaps.
+ */
+ struct list_head gap_list;
+};
+
+struct hv_hotadd_gap {
+ struct list_head list;
+ unsigned long start_pfn;
+ unsigned long end_pfn;
};
struct balloon_state {
@@ -536,7 +547,11 @@
*/
struct task_struct *thread;
- struct mutex ha_region_mutex;
+ /*
+ * Protects ha_region_list, num_pages_onlined counter and individual
+ * regions from ha_region_list.
+ */
+ spinlock_t ha_lock;
/*
* A list of hot-add regions.
@@ -560,18 +575,14 @@
void *v)
{
struct memory_notify *mem = (struct memory_notify *)v;
+ unsigned long flags;
switch (val) {
- case MEM_GOING_ONLINE:
- mutex_lock(&dm_device.ha_region_mutex);
- break;
-
case MEM_ONLINE:
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
dm_device.num_pages_onlined += mem->nr_pages;
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
case MEM_CANCEL_ONLINE:
- if (val == MEM_ONLINE ||
- mutex_is_locked(&dm_device.ha_region_mutex))
- mutex_unlock(&dm_device.ha_region_mutex);
if (dm_device.ha_waiting) {
dm_device.ha_waiting = false;
complete(&dm_device.ol_waitevent);
@@ -579,10 +590,11 @@
break;
case MEM_OFFLINE:
- mutex_lock(&dm_device.ha_region_mutex);
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
dm_device.num_pages_onlined -= mem->nr_pages;
- mutex_unlock(&dm_device.ha_region_mutex);
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
break;
+ case MEM_GOING_ONLINE:
case MEM_GOING_OFFLINE:
case MEM_CANCEL_OFFLINE:
break;
@@ -595,18 +607,46 @@
.priority = 0
};
+/* Check if the particular page is backed and can be onlined and online it. */
+static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
+{
+ unsigned long cur_start_pgp;
+ unsigned long cur_end_pgp;
+ struct hv_hotadd_gap *gap;
-static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size)
+ cur_start_pgp = (unsigned long)pfn_to_page(has->covered_start_pfn);
+ cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
+
+ /* The page is not backed. */
+ if (((unsigned long)pg < cur_start_pgp) ||
+ ((unsigned long)pg >= cur_end_pgp))
+ return;
+
+ /* Check for gaps. */
+ list_for_each_entry(gap, &has->gap_list, list) {
+ cur_start_pgp = (unsigned long)
+ pfn_to_page(gap->start_pfn);
+ cur_end_pgp = (unsigned long)
+ pfn_to_page(gap->end_pfn);
+ if (((unsigned long)pg >= cur_start_pgp) &&
+ ((unsigned long)pg < cur_end_pgp)) {
+ return;
+ }
+ }
+
+ /* This frame is currently backed; online the page. */
+ __online_page_set_limits(pg);
+ __online_page_increment_counters(pg);
+ __online_page_free(pg);
+}
+
+static void hv_bring_pgs_online(struct hv_hotadd_state *has,
+ unsigned long start_pfn, unsigned long size)
{
int i;
- for (i = 0; i < size; i++) {
- struct page *pg;
- pg = pfn_to_page(start_pfn + i);
- __online_page_set_limits(pg);
- __online_page_increment_counters(pg);
- __online_page_free(pg);
- }
+ for (i = 0; i < size; i++)
+ hv_page_online_one(has, pfn_to_page(start_pfn + i));
}
static void hv_mem_hot_add(unsigned long start, unsigned long size,
@@ -618,9 +658,12 @@
unsigned long start_pfn;
unsigned long processed_pfn;
unsigned long total_pfn = pfn_count;
+ unsigned long flags;
for (i = 0; i < (size/HA_CHUNK); i++) {
start_pfn = start + (i * HA_CHUNK);
+
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
has->ha_end_pfn += HA_CHUNK;
if (total_pfn > HA_CHUNK) {
@@ -632,11 +675,11 @@
}
has->covered_end_pfn += processed_pfn;
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
init_completion(&dm_device.ol_waitevent);
- dm_device.ha_waiting = true;
+ dm_device.ha_waiting = !memhp_auto_online;
- mutex_unlock(&dm_device.ha_region_mutex);
nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
ret = add_memory(nid, PFN_PHYS((start_pfn)),
(HA_CHUNK << PAGE_SHIFT));
@@ -653,20 +696,23 @@
*/
do_hot_add = false;
}
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
has->ha_end_pfn -= HA_CHUNK;
has->covered_end_pfn -= processed_pfn;
- mutex_lock(&dm_device.ha_region_mutex);
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
break;
}
/*
- * Wait for the memory block to be onlined.
- * Since the hot add has succeeded, it is ok to
- * proceed even if the pages in the hot added region
- * have not been "onlined" within the allowed time.
+ * Wait for the memory block to be onlined when memory onlining
+ * is done outside of kernel (memhp_auto_online). Since the hot
+ * add has succeeded, it is ok to proceed even if the pages in
+ * the hot added region have not been "onlined" within the
+ * allowed time.
*/
- wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
- mutex_lock(&dm_device.ha_region_mutex);
+ if (dm_device.ha_waiting)
+ wait_for_completion_timeout(&dm_device.ol_waitevent,
+ 5*HZ);
post_status(&dm_device);
}
@@ -675,47 +721,64 @@
static void hv_online_page(struct page *pg)
{
- struct list_head *cur;
struct hv_hotadd_state *has;
unsigned long cur_start_pgp;
unsigned long cur_end_pgp;
+ unsigned long flags;
- list_for_each(cur, &dm_device.ha_region_list) {
- has = list_entry(cur, struct hv_hotadd_state, list);
- cur_start_pgp = (unsigned long)pfn_to_page(has->start_pfn);
- cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
+ list_for_each_entry(has, &dm_device.ha_region_list, list) {
+ cur_start_pgp = (unsigned long)
+ pfn_to_page(has->start_pfn);
+ cur_end_pgp = (unsigned long)pfn_to_page(has->end_pfn);
- if (((unsigned long)pg >= cur_start_pgp) &&
- ((unsigned long)pg < cur_end_pgp)) {
- /*
- * This frame is currently backed; online the
- * page.
- */
- __online_page_set_limits(pg);
- __online_page_increment_counters(pg);
- __online_page_free(pg);
- }
+ /* The page belongs to a different HAS. */
+ if (((unsigned long)pg < cur_start_pgp) ||
+ ((unsigned long)pg >= cur_end_pgp))
+ continue;
+
+ hv_page_online_one(has, pg);
+ break;
}
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
}
-static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
+static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
{
- struct list_head *cur;
struct hv_hotadd_state *has;
+ struct hv_hotadd_gap *gap;
unsigned long residual, new_inc;
+ int ret = 0;
+ unsigned long flags;
- if (list_empty(&dm_device.ha_region_list))
- return false;
-
- list_for_each(cur, &dm_device.ha_region_list) {
- has = list_entry(cur, struct hv_hotadd_state, list);
-
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
+ list_for_each_entry(has, &dm_device.ha_region_list, list) {
/*
* If the pfn range we are dealing with is not in the current
* "hot add block", move on.
*/
if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn)
continue;
+
+ /*
+ * If the current start pfn is not where the covered_end
+ * is, create a gap and update covered_end_pfn.
+ */
+ if (has->covered_end_pfn != start_pfn) {
+ gap = kzalloc(sizeof(struct hv_hotadd_gap), GFP_ATOMIC);
+ if (!gap) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ INIT_LIST_HEAD(&gap->list);
+ gap->start_pfn = has->covered_end_pfn;
+ gap->end_pfn = start_pfn;
+ list_add_tail(&gap->list, &has->gap_list);
+
+ has->covered_end_pfn = start_pfn;
+ }
+
/*
* If the current hot add-request extends beyond
* our current limit; extend it.
@@ -732,19 +795,12 @@
has->end_pfn += new_inc;
}
- /*
- * If the current start pfn is not where the covered_end
- * is, update it.
- */
-
- if (has->covered_end_pfn != start_pfn)
- has->covered_end_pfn = start_pfn;
-
- return true;
-
+ ret = 1;
+ break;
}
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
- return false;
+ return ret;
}
static unsigned long handle_pg_range(unsigned long pg_start,
@@ -753,17 +809,13 @@
unsigned long start_pfn = pg_start;
unsigned long pfn_cnt = pg_count;
unsigned long size;
- struct list_head *cur;
struct hv_hotadd_state *has;
unsigned long pgs_ol = 0;
unsigned long old_covered_state;
+ unsigned long res = 0, flags;
- if (list_empty(&dm_device.ha_region_list))
- return 0;
-
- list_for_each(cur, &dm_device.ha_region_list) {
- has = list_entry(cur, struct hv_hotadd_state, list);
-
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
+ list_for_each_entry(has, &dm_device.ha_region_list, list) {
/*
* If the pfn range we are dealing with is not in the current
* "hot add block", move on.
@@ -783,6 +835,8 @@
if (pgs_ol > pfn_cnt)
pgs_ol = pfn_cnt;
+ has->covered_end_pfn += pgs_ol;
+ pfn_cnt -= pgs_ol;
/*
* Check if the corresponding memory block is already
* online by checking its last previously backed page.
@@ -791,10 +845,8 @@
*/
if (start_pfn > has->start_pfn &&
!PageReserved(pfn_to_page(start_pfn - 1)))
- hv_bring_pgs_online(start_pfn, pgs_ol);
+ hv_bring_pgs_online(has, start_pfn, pgs_ol);
- has->covered_end_pfn += pgs_ol;
- pfn_cnt -= pgs_ol;
}
if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) {
@@ -813,17 +865,20 @@
} else {
pfn_cnt = size;
}
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has);
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
}
/*
* If we managed to online any pages that were given to us,
* we declare success.
*/
- return has->covered_end_pfn - old_covered_state;
-
+ res = has->covered_end_pfn - old_covered_state;
+ break;
}
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
- return 0;
+ return res;
}
static unsigned long process_hot_add(unsigned long pg_start,
@@ -832,13 +887,20 @@
unsigned long rg_size)
{
struct hv_hotadd_state *ha_region = NULL;
+ int covered;
+ unsigned long flags;
if (pfn_cnt == 0)
return 0;
- if (!dm_device.host_specified_ha_region)
- if (pfn_covered(pg_start, pfn_cnt))
+ if (!dm_device.host_specified_ha_region) {
+ covered = pfn_covered(pg_start, pfn_cnt);
+ if (covered < 0)
+ return 0;
+
+ if (covered)
goto do_pg_range;
+ }
/*
* If the host has specified a hot-add range; deal with it first.
@@ -850,12 +912,17 @@
return 0;
INIT_LIST_HEAD(&ha_region->list);
+ INIT_LIST_HEAD(&ha_region->gap_list);
- list_add_tail(&ha_region->list, &dm_device.ha_region_list);
ha_region->start_pfn = rg_start;
ha_region->ha_end_pfn = rg_start;
+ ha_region->covered_start_pfn = pg_start;
ha_region->covered_end_pfn = pg_start;
ha_region->end_pfn = rg_start + rg_size;
+
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
+ list_add_tail(&ha_region->list, &dm_device.ha_region_list);
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
}
do_pg_range:
@@ -882,7 +949,6 @@
resp.hdr.size = sizeof(struct dm_hot_add_response);
#ifdef CONFIG_MEMORY_HOTPLUG
- mutex_lock(&dm_device.ha_region_mutex);
pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
@@ -916,7 +982,6 @@
rg_start, rg_sz);
dm->num_pages_added += resp.page_count;
- mutex_unlock(&dm_device.ha_region_mutex);
#endif
/*
* The result field of the response structure has the
@@ -1010,7 +1075,6 @@
static void post_status(struct hv_dynmem_device *dm)
{
struct dm_status status;
- struct sysinfo val;
unsigned long now = jiffies;
unsigned long last_post = last_post_time;
@@ -1022,7 +1086,6 @@
if (!time_after(now, (last_post_time + HZ)))
return;
- si_meminfo(&val);
memset(&status, 0, sizeof(struct dm_status));
status.hdr.type = DM_STATUS_REPORT;
status.hdr.size = sizeof(struct dm_status);
@@ -1038,7 +1101,7 @@
* num_pages_onlined) as committed to the host, otherwise it can try
* asking us to balloon them out.
*/
- status.num_avail = val.freeram;
+ status.num_avail = si_mem_available();
status.num_committed = vm_memory_committed() +
dm->num_pages_ballooned +
(dm->num_pages_added > dm->num_pages_onlined ?
@@ -1144,7 +1207,7 @@
int ret;
bool done = false;
int i;
- struct sysinfo val;
+ long avail_pages;
unsigned long floor;
/* The host balloons pages in 2M granularity. */
@@ -1156,12 +1219,12 @@
*/
alloc_unit = 512;
- si_meminfo(&val);
+ avail_pages = si_mem_available();
floor = compute_balloon_floor();
/* Refuse to balloon below the floor, keep the 2M granularity. */
- if (val.freeram < num_pages || val.freeram - num_pages < floor) {
- num_pages = val.freeram > floor ? (val.freeram - floor) : 0;
+ if (avail_pages < num_pages || avail_pages - num_pages < floor) {
+ num_pages = avail_pages > floor ? (avail_pages - floor) : 0;
num_pages -= num_pages % PAGES_IN_2M;
}
@@ -1172,7 +1235,6 @@
bl_resp->hdr.size = sizeof(struct dm_balloon_response);
bl_resp->more_pages = 1;
-
num_pages -= num_ballooned;
num_ballooned = alloc_balloon_pages(&dm_device, num_pages,
bl_resp, alloc_unit);
@@ -1461,7 +1523,7 @@
init_completion(&dm_device.host_event);
init_completion(&dm_device.config_event);
INIT_LIST_HEAD(&dm_device.ha_region_list);
- mutex_init(&dm_device.ha_region_mutex);
+ spin_lock_init(&dm_device.ha_lock);
INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
dm_device.host_specified_ha_region = false;
@@ -1580,8 +1642,9 @@
static int balloon_remove(struct hv_device *dev)
{
struct hv_dynmem_device *dm = hv_get_drvdata(dev);
- struct list_head *cur, *tmp;
- struct hv_hotadd_state *has;
+ struct hv_hotadd_state *has, *tmp;
+ struct hv_hotadd_gap *gap, *tmp_gap;
+ unsigned long flags;
if (dm->num_pages_ballooned != 0)
pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
@@ -1596,11 +1659,16 @@
restore_online_page_callback(&hv_online_page);
unregister_memory_notifier(&hv_memory_nb);
#endif
- list_for_each_safe(cur, tmp, &dm->ha_region_list) {
- has = list_entry(cur, struct hv_hotadd_state, list);
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
+ list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
+ list_for_each_entry_safe(gap, tmp_gap, &has->gap_list, list) {
+ list_del(&gap->list);
+ kfree(gap);
+ }
list_del(&has->list);
kfree(has);
}
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
return 0;
}
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 23c7079..8b2ba98 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -83,6 +83,12 @@
hv_poll_channel(fcopy_transaction.recv_channel, fcopy_poll_wrapper);
}
+static void fcopy_register_done(void)
+{
+ pr_debug("FCP: userspace daemon registered\n");
+ hv_poll_channel(fcopy_transaction.recv_channel, fcopy_poll_wrapper);
+}
+
static int fcopy_handle_handshake(u32 version)
{
u32 our_ver = FCOPY_CURRENT_VERSION;
@@ -94,7 +100,8 @@
break;
case FCOPY_VERSION_1:
/* Daemon expects us to reply with our own version */
- if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver)))
+ if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver),
+ fcopy_register_done))
return -EFAULT;
dm_reg_value = version;
break;
@@ -107,8 +114,7 @@
*/
return -EINVAL;
}
- pr_debug("FCP: userspace daemon ver. %d registered\n", version);
- hv_poll_channel(fcopy_transaction.recv_channel, fcopy_poll_wrapper);
+ pr_debug("FCP: userspace daemon ver. %d connected\n", version);
return 0;
}
@@ -161,7 +167,7 @@
}
fcopy_transaction.state = HVUTIL_USERSPACE_REQ;
- rc = hvutil_transport_send(hvt, out_src, out_len);
+ rc = hvutil_transport_send(hvt, out_src, out_len, NULL);
if (rc) {
pr_debug("FCP: failed to communicate to the daemon: %d\n", rc);
if (cancel_delayed_work_sync(&fcopy_timeout_work)) {
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index cb1a916..5e1fdc8 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -102,6 +102,17 @@
hv_kvp_onchannelcallback(channel);
}
+static void kvp_register_done(void)
+{
+ /*
+ * If we're still negotiating with the host cancel the timeout
+ * work to not poll the channel twice.
+ */
+ pr_debug("KVP: userspace daemon registered\n");
+ cancel_delayed_work_sync(&kvp_host_handshake_work);
+ hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
+}
+
static void
kvp_register(int reg_value)
{
@@ -116,7 +127,8 @@
kvp_msg->kvp_hdr.operation = reg_value;
strcpy(version, HV_DRV_VERSION);
- hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg));
+ hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg),
+ kvp_register_done);
kfree(kvp_msg);
}
}
@@ -158,17 +170,10 @@
/*
* We have a compatible daemon; complete the handshake.
*/
- pr_debug("KVP: userspace daemon ver. %d registered\n",
- KVP_OP_REGISTER);
+ pr_debug("KVP: userspace daemon ver. %d connected\n",
+ msg->kvp_hdr.operation);
kvp_register(dm_reg_value);
- /*
- * If we're still negotiating with the host cancel the timeout
- * work to not poll the channel twice.
- */
- cancel_delayed_work_sync(&kvp_host_handshake_work);
- hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
-
return 0;
}
@@ -455,7 +460,7 @@
}
kvp_transaction.state = HVUTIL_USERSPACE_REQ;
- rc = hvutil_transport_send(hvt, message, sizeof(*message));
+ rc = hvutil_transport_send(hvt, message, sizeof(*message), NULL);
if (rc) {
pr_debug("KVP: failed to communicate to the daemon: %d\n", rc);
if (cancel_delayed_work_sync(&kvp_timeout_work)) {
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 3fba14e..a670713 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -67,11 +67,11 @@
static __u8 *recv_buffer;
static struct hvutil_transport *hvt;
-static void vss_send_op(struct work_struct *dummy);
static void vss_timeout_func(struct work_struct *dummy);
+static void vss_handle_request(struct work_struct *dummy);
static DECLARE_DELAYED_WORK(vss_timeout_work, vss_timeout_func);
-static DECLARE_WORK(vss_send_op_work, vss_send_op);
+static DECLARE_WORK(vss_handle_request_work, vss_handle_request);
static void vss_poll_wrapper(void *channel)
{
@@ -95,6 +95,12 @@
hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
}
+static void vss_register_done(void)
+{
+ hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
+ pr_debug("VSS: userspace daemon registered\n");
+}
+
static int vss_handle_handshake(struct hv_vss_msg *vss_msg)
{
u32 our_ver = VSS_OP_REGISTER1;
@@ -105,16 +111,16 @@
dm_reg_value = VSS_OP_REGISTER;
break;
case VSS_OP_REGISTER1:
- /* Daemon expects us to reply with our own version*/
- if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver)))
+ /* Daemon expects us to reply with our own version */
+ if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver),
+ vss_register_done))
return -EFAULT;
dm_reg_value = VSS_OP_REGISTER1;
break;
default:
return -EINVAL;
}
- hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
- pr_debug("VSS: userspace daemon ver. %d registered\n", dm_reg_value);
+ pr_debug("VSS: userspace daemon ver. %d connected\n", dm_reg_value);
return 0;
}
@@ -136,6 +142,11 @@
return vss_handle_handshake(vss_msg);
} else if (vss_transaction.state == HVUTIL_USERSPACE_REQ) {
vss_transaction.state = HVUTIL_USERSPACE_RECV;
+
+ if (vss_msg->vss_hdr.operation == VSS_OP_HOT_BACKUP)
+ vss_transaction.msg->vss_cf.flags =
+ VSS_HBU_NO_AUTO_RECOVERY;
+
if (cancel_delayed_work_sync(&vss_timeout_work)) {
vss_respond_to_host(vss_msg->error);
/* Transaction is finished, reset the state. */
@@ -150,8 +161,7 @@
return 0;
}
-
-static void vss_send_op(struct work_struct *dummy)
+static void vss_send_op(void)
{
int op = vss_transaction.msg->vss_hdr.operation;
int rc;
@@ -168,7 +178,10 @@
vss_msg->vss_hdr.operation = op;
vss_transaction.state = HVUTIL_USERSPACE_REQ;
- rc = hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg));
+
+ schedule_delayed_work(&vss_timeout_work, VSS_USERSPACE_TIMEOUT);
+
+ rc = hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL);
if (rc) {
pr_warn("VSS: failed to communicate to the daemon: %d\n", rc);
if (cancel_delayed_work_sync(&vss_timeout_work)) {
@@ -182,6 +195,38 @@
return;
}
+static void vss_handle_request(struct work_struct *dummy)
+{
+ switch (vss_transaction.msg->vss_hdr.operation) {
+ /*
+ * Initiate a "freeze/thaw" operation in the guest.
+ * We respond to the host once the operation is complete.
+ *
+ * We send the message to the user space daemon and the operation is
+ * performed in the daemon.
+ */
+ case VSS_OP_THAW:
+ case VSS_OP_FREEZE:
+ case VSS_OP_HOT_BACKUP:
+ if (vss_transaction.state < HVUTIL_READY) {
+ /* Userspace is not registered yet */
+ vss_respond_to_host(HV_E_FAIL);
+ return;
+ }
+ vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
+ vss_send_op();
+ return;
+ case VSS_OP_GET_DM_INFO:
+ vss_transaction.msg->dm_info.flags = 0;
+ break;
+ default:
+ break;
+ }
+
+ vss_respond_to_host(0);
+ hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
+}
+
/*
* Send a response back to the host.
*/
@@ -266,48 +311,8 @@
vss_transaction.recv_req_id = requestid;
vss_transaction.msg = (struct hv_vss_msg *)vss_msg;
- switch (vss_msg->vss_hdr.operation) {
- /*
- * Initiate a "freeze/thaw"
- * operation in the guest.
- * We respond to the host once
- * the operation is complete.
- *
- * We send the message to the
- * user space daemon and the
- * operation is performed in
- * the daemon.
- */
- case VSS_OP_FREEZE:
- case VSS_OP_THAW:
- if (vss_transaction.state < HVUTIL_READY) {
- /* Userspace is not registered yet */
- vss_respond_to_host(HV_E_FAIL);
- return;
- }
- vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
- schedule_work(&vss_send_op_work);
- schedule_delayed_work(&vss_timeout_work,
- VSS_USERSPACE_TIMEOUT);
- return;
-
- case VSS_OP_HOT_BACKUP:
- vss_msg->vss_cf.flags =
- VSS_HBU_NO_AUTO_RECOVERY;
- vss_respond_to_host(0);
- return;
-
- case VSS_OP_GET_DM_INFO:
- vss_msg->dm_info.flags = 0;
- vss_respond_to_host(0);
- return;
-
- default:
- vss_respond_to_host(0);
- return;
-
- }
-
+ schedule_work(&vss_handle_request_work);
+ return;
}
icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
@@ -358,6 +363,6 @@
{
vss_transaction.state = HVUTIL_DEVICE_DYING;
cancel_delayed_work_sync(&vss_timeout_work);
- cancel_work_sync(&vss_send_op_work);
+ cancel_work_sync(&vss_handle_request_work);
hvutil_transport_destroy(hvt);
}
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index d5acaa2..4aa3cb6 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -34,22 +34,25 @@
#define SD_MINOR 0
#define SD_VERSION (SD_MAJOR << 16 | SD_MINOR)
-#define SD_WS2008_MAJOR 1
-#define SD_WS2008_VERSION (SD_WS2008_MAJOR << 16 | SD_MINOR)
+#define SD_MAJOR_1 1
+#define SD_VERSION_1 (SD_MAJOR_1 << 16 | SD_MINOR)
-#define TS_MAJOR 3
+#define TS_MAJOR 4
#define TS_MINOR 0
#define TS_VERSION (TS_MAJOR << 16 | TS_MINOR)
-#define TS_WS2008_MAJOR 1
-#define TS_WS2008_VERSION (TS_WS2008_MAJOR << 16 | TS_MINOR)
+#define TS_MAJOR_1 1
+#define TS_VERSION_1 (TS_MAJOR_1 << 16 | TS_MINOR)
+
+#define TS_MAJOR_3 3
+#define TS_VERSION_3 (TS_MAJOR_3 << 16 | TS_MINOR)
#define HB_MAJOR 3
-#define HB_MINOR 0
+#define HB_MINOR 0
#define HB_VERSION (HB_MAJOR << 16 | HB_MINOR)
-#define HB_WS2008_MAJOR 1
-#define HB_WS2008_VERSION (HB_WS2008_MAJOR << 16 | HB_MINOR)
+#define HB_MAJOR_1 1
+#define HB_VERSION_1 (HB_MAJOR_1 << 16 | HB_MINOR)
static int sd_srv_version;
static int ts_srv_version;
@@ -61,9 +64,14 @@
.util_cb = shutdown_onchannelcallback,
};
+static int hv_timesync_init(struct hv_util_service *srv);
+static void hv_timesync_deinit(void);
+
static void timesync_onchannelcallback(void *context);
static struct hv_util_service util_timesynch = {
.util_cb = timesync_onchannelcallback,
+ .util_init = hv_timesync_init,
+ .util_deinit = hv_timesync_deinit,
};
static void heartbeat_onchannelcallback(void *context);
@@ -161,35 +169,43 @@
}
/*
- * Set guest time to host UTC time.
- */
-static inline void do_adj_guesttime(u64 hosttime)
-{
- s64 host_tns;
- struct timespec host_ts;
-
- host_tns = (hosttime - WLTIMEDELTA) * 100;
- host_ts = ns_to_timespec(host_tns);
-
- do_settimeofday(&host_ts);
-}
-
-/*
* Set the host time in a process context.
*/
struct adj_time_work {
struct work_struct work;
u64 host_time;
+ u64 ref_time;
+ u8 flags;
};
static void hv_set_host_time(struct work_struct *work)
{
struct adj_time_work *wrk;
+ s64 host_tns;
+ u64 newtime;
+ struct timespec host_ts;
wrk = container_of(work, struct adj_time_work, work);
- do_adj_guesttime(wrk->host_time);
- kfree(wrk);
+
+ newtime = wrk->host_time;
+ if (ts_srv_version > TS_VERSION_3) {
+ /*
+ * Some latency has been introduced since Hyper-V generated
+ * its time sample. Take that latency into account before
+ * using TSC reference time sample from Hyper-V.
+ *
+ * This sample is given by TimeSync v4 and above hosts.
+ */
+ u64 current_tick;
+
+ rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
+ newtime += (current_tick - wrk->ref_time);
+ }
+ host_tns = (newtime - WLTIMEDELTA) * 100;
+ host_ts = ns_to_timespec(host_tns);
+
+ do_settimeofday(&host_ts);
}
/*
@@ -198,33 +214,31 @@
* ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
* After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
* message after the timesync channel is opened. Since the hv_utils module is
- * loaded after hv_vmbus, the first message is usually missed. The other
- * thing is, systime is automatically set to emulated hardware clock which may
- * not be UTC time or in the same time zone. So, to override these effects, we
- * use the first 50 time samples for initial system time setting.
+ * loaded after hv_vmbus, the first message is usually missed. This bit is
+ * considered a hard request to discipline the clock.
+ *
+ * ICTIMESYNCFLAG_SAMPLE bit indicates a time sample from host. This is
+ * typically used as a hint to the guest. The guest is under no obligation
+ * to discipline the clock.
*/
-static inline void adj_guesttime(u64 hosttime, u8 flags)
+static struct adj_time_work wrk;
+static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 flags)
{
- struct adj_time_work *wrk;
- static s32 scnt = 50;
- wrk = kmalloc(sizeof(struct adj_time_work), GFP_ATOMIC);
- if (wrk == NULL)
+ /*
+ * This check is safe since we are executing in the
+ * interrupt context and time synch messages arre always
+ * delivered on the same CPU.
+ */
+ if (work_pending(&wrk.work))
return;
- wrk->host_time = hosttime;
- if ((flags & ICTIMESYNCFLAG_SYNC) != 0) {
- INIT_WORK(&wrk->work, hv_set_host_time);
- schedule_work(&wrk->work);
- return;
+ wrk.host_time = hosttime;
+ wrk.ref_time = reftime;
+ wrk.flags = flags;
+ if ((flags & (ICTIMESYNCFLAG_SYNC | ICTIMESYNCFLAG_SAMPLE)) != 0) {
+ schedule_work(&wrk.work);
}
-
- if ((flags & ICTIMESYNCFLAG_SAMPLE) != 0 && scnt > 0) {
- scnt--;
- INIT_WORK(&wrk->work, hv_set_host_time);
- schedule_work(&wrk->work);
- } else
- kfree(wrk);
}
/*
@@ -237,6 +251,7 @@
u64 requestid;
struct icmsg_hdr *icmsghdrp;
struct ictimesync_data *timedatap;
+ struct ictimesync_ref_data *refdata;
u8 *time_txf_buf = util_timesynch.recv_buffer;
struct icmsg_negotiate *negop = NULL;
@@ -252,11 +267,27 @@
time_txf_buf,
util_fw_version,
ts_srv_version);
+ pr_info("Using TimeSync version %d.%d\n",
+ ts_srv_version >> 16, ts_srv_version & 0xFFFF);
} else {
- timedatap = (struct ictimesync_data *)&time_txf_buf[
- sizeof(struct vmbuspipe_hdr) +
- sizeof(struct icmsg_hdr)];
- adj_guesttime(timedatap->parenttime, timedatap->flags);
+ if (ts_srv_version > TS_VERSION_3) {
+ refdata = (struct ictimesync_ref_data *)
+ &time_txf_buf[
+ sizeof(struct vmbuspipe_hdr) +
+ sizeof(struct icmsg_hdr)];
+
+ adj_guesttime(refdata->parenttime,
+ refdata->vmreferencetime,
+ refdata->flags);
+ } else {
+ timedatap = (struct ictimesync_data *)
+ &time_txf_buf[
+ sizeof(struct vmbuspipe_hdr) +
+ sizeof(struct icmsg_hdr)];
+ adj_guesttime(timedatap->parenttime,
+ 0,
+ timedatap->flags);
+ }
}
icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
@@ -350,16 +381,21 @@
switch (vmbus_proto_version) {
case (VERSION_WS2008):
util_fw_version = UTIL_WS2K8_FW_VERSION;
- sd_srv_version = SD_WS2008_VERSION;
- ts_srv_version = TS_WS2008_VERSION;
- hb_srv_version = HB_WS2008_VERSION;
+ sd_srv_version = SD_VERSION_1;
+ ts_srv_version = TS_VERSION_1;
+ hb_srv_version = HB_VERSION_1;
break;
-
- default:
+ case(VERSION_WIN10):
util_fw_version = UTIL_FW_VERSION;
sd_srv_version = SD_VERSION;
ts_srv_version = TS_VERSION;
hb_srv_version = HB_VERSION;
+ break;
+ default:
+ util_fw_version = UTIL_FW_VERSION;
+ sd_srv_version = SD_VERSION;
+ ts_srv_version = TS_VERSION_3;
+ hb_srv_version = HB_VERSION;
}
ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0,
@@ -427,6 +463,17 @@
.remove = util_remove,
};
+static int hv_timesync_init(struct hv_util_service *srv)
+{
+ INIT_WORK(&wrk.work, hv_set_host_time);
+ return 0;
+}
+
+static void hv_timesync_deinit(void)
+{
+ cancel_work_sync(&wrk.work);
+}
+
static int __init init_hyperv_utils(void)
{
pr_info("Registering HyperV Utility Driver\n");
diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c
index 9a9983f..c235a95 100644
--- a/drivers/hv/hv_utils_transport.c
+++ b/drivers/hv/hv_utils_transport.c
@@ -72,6 +72,10 @@
hvt->outmsg = NULL;
hvt->outmsg_len = 0;
+ if (hvt->on_read)
+ hvt->on_read();
+ hvt->on_read = NULL;
+
out_unlock:
mutex_unlock(&hvt->lock);
return ret;
@@ -219,7 +223,8 @@
mutex_unlock(&hvt->lock);
}
-int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len)
+int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len,
+ void (*on_read_cb)(void))
{
struct cn_msg *cn_msg;
int ret = 0;
@@ -237,6 +242,13 @@
memcpy(cn_msg->data, msg, len);
ret = cn_netlink_send(cn_msg, 0, 0, GFP_ATOMIC);
kfree(cn_msg);
+ /*
+ * We don't know when netlink messages are delivered but unlike
+ * in CHARDEV mode we're not blocked and we can send next
+ * messages right away.
+ */
+ if (on_read_cb)
+ on_read_cb();
return ret;
}
/* HVUTIL_TRANSPORT_CHARDEV */
@@ -255,6 +267,7 @@
if (hvt->outmsg) {
memcpy(hvt->outmsg, msg, len);
hvt->outmsg_len = len;
+ hvt->on_read = on_read_cb;
wake_up_interruptible(&hvt->outmsg_q);
} else
ret = -ENOMEM;
diff --git a/drivers/hv/hv_utils_transport.h b/drivers/hv/hv_utils_transport.h
index 06254a1..d98f522 100644
--- a/drivers/hv/hv_utils_transport.h
+++ b/drivers/hv/hv_utils_transport.h
@@ -36,6 +36,7 @@
struct list_head list; /* hvt_list */
int (*on_msg)(void *, int); /* callback on new user message */
void (*on_reset)(void); /* callback when userspace drops */
+ void (*on_read)(void); /* callback on message read */
u8 *outmsg; /* message to the userspace */
int outmsg_len; /* its length */
wait_queue_head_t outmsg_q; /* poll/read wait queue */
@@ -46,7 +47,8 @@
u32 cn_idx, u32 cn_val,
int (*on_msg)(void *, int),
void (*on_reset)(void));
-int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len);
+int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len,
+ void (*on_read_cb)(void));
void hvutil_transport_destroy(struct hvutil_transport *hvt);
#endif /* _HV_UTILS_TRANSPORT_H */
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 718b5c7..a5b4442 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -495,7 +495,7 @@
extern int hv_init(void);
-extern void hv_cleanup(void);
+extern void hv_cleanup(bool crash);
extern int hv_post_message(union hv_connection_id connection_id,
enum hv_message_type message_type,
@@ -522,14 +522,15 @@
/* Interface */
-int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, void *buffer,
- u32 buflen);
+int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
+ struct page *pages, u32 pagecnt);
void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info,
struct kvec *kv_list,
- u32 kv_count, bool *signal, bool lock);
+ u32 kv_count, bool *signal, bool lock,
+ enum hv_signal_policy policy);
int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
void *buffer, u32 buflen, u32 *buffer_actual_len,
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index fe586bf..08043da 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -27,6 +27,8 @@
#include <linux/mm.h>
#include <linux/hyperv.h>
#include <linux/uio.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
#include "hyperv_vmbus.h"
@@ -66,12 +68,20 @@
* arrived.
*/
-static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi)
+static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi,
+ enum hv_signal_policy policy)
{
virt_mb();
if (READ_ONCE(rbi->ring_buffer->interrupt_mask))
return false;
+ /*
+ * When the client wants to control signaling,
+ * we only honour the host interrupt mask.
+ */
+ if (policy == HV_SIGNAL_POLICY_EXPLICIT)
+ return true;
+
/* check interrupt_mask before read_index */
virt_rmb();
/*
@@ -162,18 +172,7 @@
void *ring_buffer = hv_get_ring_buffer(ring_info);
u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
- u32 frag_len;
-
- /* wrap-around detected at the src */
- if (destlen > ring_buffer_size - start_read_offset) {
- frag_len = ring_buffer_size - start_read_offset;
-
- memcpy(dest, ring_buffer + start_read_offset, frag_len);
- memcpy(dest + frag_len, ring_buffer, destlen - frag_len);
- } else
-
- memcpy(dest, ring_buffer + start_read_offset, destlen);
-
+ memcpy(dest, ring_buffer + start_read_offset, destlen);
start_read_offset += destlen;
start_read_offset %= ring_buffer_size;
@@ -194,15 +193,8 @@
{
void *ring_buffer = hv_get_ring_buffer(ring_info);
u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
- u32 frag_len;
- /* wrap-around detected! */
- if (srclen > ring_buffer_size - start_write_offset) {
- frag_len = ring_buffer_size - start_write_offset;
- memcpy(ring_buffer + start_write_offset, src, frag_len);
- memcpy(ring_buffer, src + frag_len, srclen - frag_len);
- } else
- memcpy(ring_buffer + start_write_offset, src, srclen);
+ memcpy(ring_buffer + start_write_offset, src, srclen);
start_write_offset += srclen;
start_write_offset %= ring_buffer_size;
@@ -235,22 +227,46 @@
/* Initialize the ring buffer. */
int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
- void *buffer, u32 buflen)
+ struct page *pages, u32 page_cnt)
{
- if (sizeof(struct hv_ring_buffer) != PAGE_SIZE)
- return -EINVAL;
+ int i;
+ struct page **pages_wraparound;
+
+ BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
memset(ring_info, 0, sizeof(struct hv_ring_buffer_info));
- ring_info->ring_buffer = (struct hv_ring_buffer *)buffer;
+ /*
+ * First page holds struct hv_ring_buffer, do wraparound mapping for
+ * the rest.
+ */
+ pages_wraparound = kzalloc(sizeof(struct page *) * (page_cnt * 2 - 1),
+ GFP_KERNEL);
+ if (!pages_wraparound)
+ return -ENOMEM;
+
+ pages_wraparound[0] = pages;
+ for (i = 0; i < 2 * (page_cnt - 1); i++)
+ pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
+
+ ring_info->ring_buffer = (struct hv_ring_buffer *)
+ vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
+
+ kfree(pages_wraparound);
+
+
+ if (!ring_info->ring_buffer)
+ return -ENOMEM;
+
ring_info->ring_buffer->read_index =
ring_info->ring_buffer->write_index = 0;
/* Set the feature bit for enabling flow control. */
ring_info->ring_buffer->feature_bits.value = 1;
- ring_info->ring_size = buflen;
- ring_info->ring_datasize = buflen - sizeof(struct hv_ring_buffer);
+ ring_info->ring_size = page_cnt << PAGE_SHIFT;
+ ring_info->ring_datasize = ring_info->ring_size -
+ sizeof(struct hv_ring_buffer);
spin_lock_init(&ring_info->ring_lock);
@@ -260,11 +276,13 @@
/* Cleanup the ring buffer. */
void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
{
+ vunmap(ring_info->ring_buffer);
}
/* Write to the ring buffer. */
int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
- struct kvec *kv_list, u32 kv_count, bool *signal, bool lock)
+ struct kvec *kv_list, u32 kv_count, bool *signal, bool lock,
+ enum hv_signal_policy policy)
{
int i = 0;
u32 bytes_avail_towrite;
@@ -326,7 +344,7 @@
if (lock)
spin_unlock_irqrestore(&outring_info->ring_lock, flags);
- *signal = hv_need_to_signal(old_write, outring_info);
+ *signal = hv_need_to_signal(old_write, outring_info, policy);
return 0;
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index e82f7e1..a259e18 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -105,8 +105,8 @@
static const char *fb_mmio_name = "fb_range";
static struct resource *fb_mmio;
-struct resource *hyperv_mmio;
-DEFINE_SEMAPHORE(hyperv_mmio_lock);
+static struct resource *hyperv_mmio;
+static DEFINE_SEMAPHORE(hyperv_mmio_lock);
static int vmbus_exists(void)
{
@@ -874,7 +874,7 @@
bus_unregister(&hv_bus);
err_cleanup:
- hv_cleanup();
+ hv_cleanup(false);
return ret;
}
@@ -961,8 +961,8 @@
{
int ret = 0;
- dev_set_name(&child_device_obj->device, "vmbus_%d",
- child_device_obj->channel->id);
+ dev_set_name(&child_device_obj->device, "vmbus-%pUl",
+ child_device_obj->channel->offermsg.offer.if_instance.b);
child_device_obj->device.bus = &hv_bus;
child_device_obj->device.parent = &hv_acpi_dev->dev;
@@ -1326,7 +1326,7 @@
vmbus_initiate_unload(false);
for_each_online_cpu(cpu)
smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
- hv_cleanup();
+ hv_cleanup(false);
};
static void hv_crash_handler(struct pt_regs *regs)
@@ -1338,7 +1338,7 @@
* for kdump.
*/
hv_synic_cleanup(NULL);
- hv_cleanup();
+ hv_cleanup(true);
};
static int __init hv_acpi_init(void)
@@ -1398,7 +1398,7 @@
&hyperv_panic_block);
}
bus_unregister(&hv_bus);
- hv_cleanup();
+ hv_cleanup(false);
for_each_online_cpu(cpu) {
tasklet_kill(hv_context.event_dpc[cpu]);
smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index acf9c03..34704b0 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -21,6 +21,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -36,6 +37,7 @@
#include <linux/io.h>
#include <linux/sched.h>
#include <linux/ctype.h>
+#include <linux/smp.h>
#include <linux/i8k.h>
@@ -134,11 +136,11 @@
/*
* Call the System Management Mode BIOS. Code provided by Jonathan Buzzard.
*/
-static int i8k_smm(struct smm_regs *regs)
+static int i8k_smm_func(void *par)
{
int rc;
+ struct smm_regs *regs = par;
int eax = regs->eax;
- cpumask_var_t old_mask;
#ifdef DEBUG
int ebx = regs->ebx;
@@ -149,16 +151,8 @@
#endif
/* SMM requires CPU 0 */
- if (!alloc_cpumask_var(&old_mask, GFP_KERNEL))
- return -ENOMEM;
- cpumask_copy(old_mask, ¤t->cpus_allowed);
- rc = set_cpus_allowed_ptr(current, cpumask_of(0));
- if (rc)
- goto out;
- if (smp_processor_id() != 0) {
- rc = -EBUSY;
- goto out;
- }
+ if (smp_processor_id() != 0)
+ return -EBUSY;
#if defined(CONFIG_X86_64)
asm volatile("pushq %%rax\n\t"
@@ -216,10 +210,6 @@
if (rc != 0 || (regs->eax & 0xffff) == 0xffff || regs->eax == eax)
rc = -EINVAL;
-out:
- set_cpus_allowed_ptr(current, old_mask);
- free_cpumask_var(old_mask);
-
#ifdef DEBUG
rettime = ktime_get();
delta = ktime_sub(rettime, calltime);
@@ -232,6 +222,20 @@
}
/*
+ * Call the System Management Mode BIOS.
+ */
+static int i8k_smm(struct smm_regs *regs)
+{
+ int ret;
+
+ get_online_cpus();
+ ret = smp_call_on_cpu(0, i8k_smm_func, regs, true);
+ put_online_cpus();
+
+ return ret;
+}
+
+/*
* Read the fan status.
*/
static int i8k_get_fan_status(int fan)
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index 4d20b0b..d7325c65 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -184,8 +184,7 @@
if (coresight_timeout(drvdata->base, ETB_FFCR, ETB_FFCR_BIT, 0)) {
dev_err(drvdata->dev,
- "timeout observed when probing at offset %#x\n",
- ETB_FFCR);
+ "timeout while waiting for completion of Manual Flush\n");
}
/* disable trace capture */
@@ -193,8 +192,7 @@
if (coresight_timeout(drvdata->base, ETB_FFSR, ETB_FFSR_BIT, 1)) {
dev_err(drvdata->dev,
- "timeout observed when probing at offset %#x\n",
- ETB_FFCR);
+ "timeout while waiting for Formatter to Stop\n");
}
CS_LOCK(drvdata->base);
@@ -561,7 +559,7 @@
};
#define coresight_etb10_simple_func(name, offset) \
- coresight_simple_func(struct etb_drvdata, name, offset)
+ coresight_simple_func(struct etb_drvdata, NULL, name, offset)
coresight_etb10_simple_func(rdp, ETB_RAM_DEPTH_REG);
coresight_etb10_simple_func(sts, ETB_STATUS_REG);
@@ -638,7 +636,7 @@
struct coresight_platform_data *pdata = NULL;
struct etb_drvdata *drvdata;
struct resource *res = &adev->res;
- struct coresight_desc *desc;
+ struct coresight_desc desc = { 0 };
struct device_node *np = adev->dev.of_node;
if (np) {
@@ -684,17 +682,13 @@
return -ENOMEM;
}
- desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
- if (!desc)
- return -ENOMEM;
-
- desc->type = CORESIGHT_DEV_TYPE_SINK;
- desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
- desc->ops = &etb_cs_ops;
- desc->pdata = pdata;
- desc->dev = dev;
- desc->groups = coresight_etb_groups;
- drvdata->csdev = coresight_register(desc);
+ desc.type = CORESIGHT_DEV_TYPE_SINK;
+ desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+ desc.ops = &etb_cs_ops;
+ desc.pdata = pdata;
+ desc.dev = dev;
+ desc.groups = coresight_etb_groups;
+ drvdata->csdev = coresight_register(&desc);
if (IS_ERR(drvdata->csdev))
return PTR_ERR(drvdata->csdev);
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 755125f..2cd7c71 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -27,6 +27,7 @@
#include <linux/types.h>
#include <linux/workqueue.h>
+#include "coresight-etm-perf.h"
#include "coresight-priv.h"
static struct pmu etm_pmu;
@@ -71,14 +72,48 @@
static void etm_event_read(struct perf_event *event) {}
-static int etm_event_init(struct perf_event *event)
+static int etm_addr_filters_alloc(struct perf_event *event)
{
- if (event->attr.type != etm_pmu.type)
- return -ENOENT;
+ struct etm_filters *filters;
+ int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
+
+ filters = kzalloc_node(sizeof(struct etm_filters), GFP_KERNEL, node);
+ if (!filters)
+ return -ENOMEM;
+
+ if (event->parent)
+ memcpy(filters, event->parent->hw.addr_filters,
+ sizeof(*filters));
+
+ event->hw.addr_filters = filters;
return 0;
}
+static void etm_event_destroy(struct perf_event *event)
+{
+ kfree(event->hw.addr_filters);
+ event->hw.addr_filters = NULL;
+}
+
+static int etm_event_init(struct perf_event *event)
+{
+ int ret = 0;
+
+ if (event->attr.type != etm_pmu.type) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ret = etm_addr_filters_alloc(event);
+ if (ret)
+ goto out;
+
+ event->destroy = etm_event_destroy;
+out:
+ return ret;
+}
+
static void free_event_data(struct work_struct *work)
{
int cpu;
@@ -100,7 +135,7 @@
}
for_each_cpu(cpu, mask) {
- if (event_data->path[cpu])
+ if (!(IS_ERR_OR_NULL(event_data->path[cpu])))
coresight_release_path(event_data->path[cpu]);
}
@@ -185,7 +220,7 @@
* referenced later when the path is actually needed.
*/
event_data->path[cpu] = coresight_build_path(csdev);
- if (!event_data->path[cpu])
+ if (IS_ERR(event_data->path[cpu]))
goto err;
}
@@ -258,7 +293,7 @@
event->hw.state = 0;
/* Finally enable the tracer */
- if (source_ops(csdev)->enable(csdev, &event->attr, CS_MODE_PERF))
+ if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
goto fail_end_stop;
out:
@@ -291,7 +326,7 @@
return;
/* stop tracer */
- source_ops(csdev)->disable(csdev);
+ source_ops(csdev)->disable(csdev, event);
/* tell the core */
event->hw.state = PERF_HES_STOPPED;
@@ -342,6 +377,87 @@
etm_event_stop(event, PERF_EF_UPDATE);
}
+static int etm_addr_filters_validate(struct list_head *filters)
+{
+ bool range = false, address = false;
+ int index = 0;
+ struct perf_addr_filter *filter;
+
+ list_for_each_entry(filter, filters, entry) {
+ /*
+ * No need to go further if there's no more
+ * room for filters.
+ */
+ if (++index > ETM_ADDR_CMP_MAX)
+ return -EOPNOTSUPP;
+
+ /*
+ * As taken from the struct perf_addr_filter documentation:
+ * @range: 1: range, 0: address
+ *
+ * At this time we don't allow range and start/stop filtering
+ * to cohabitate, they have to be mutually exclusive.
+ */
+ if ((filter->range == 1) && address)
+ return -EOPNOTSUPP;
+
+ if ((filter->range == 0) && range)
+ return -EOPNOTSUPP;
+
+ /*
+ * For range filtering, the second address in the address
+ * range comparator needs to be higher than the first.
+ * Invalid otherwise.
+ */
+ if (filter->range && filter->size == 0)
+ return -EINVAL;
+
+ /*
+ * Everything checks out with this filter, record what we've
+ * received before moving on to the next one.
+ */
+ if (filter->range)
+ range = true;
+ else
+ address = true;
+ }
+
+ return 0;
+}
+
+static void etm_addr_filters_sync(struct perf_event *event)
+{
+ struct perf_addr_filters_head *head = perf_event_addr_filters(event);
+ unsigned long start, stop, *offs = event->addr_filters_offs;
+ struct etm_filters *filters = event->hw.addr_filters;
+ struct etm_filter *etm_filter;
+ struct perf_addr_filter *filter;
+ int i = 0;
+
+ list_for_each_entry(filter, &head->list, entry) {
+ start = filter->offset + offs[i];
+ stop = start + filter->size;
+ etm_filter = &filters->etm_filter[i];
+
+ if (filter->range == 1) {
+ etm_filter->start_addr = start;
+ etm_filter->stop_addr = stop;
+ etm_filter->type = ETM_ADDR_TYPE_RANGE;
+ } else {
+ if (filter->filter == 1) {
+ etm_filter->start_addr = start;
+ etm_filter->type = ETM_ADDR_TYPE_START;
+ } else {
+ etm_filter->stop_addr = stop;
+ etm_filter->type = ETM_ADDR_TYPE_STOP;
+ }
+ }
+ i++;
+ }
+
+ filters->nr_filters = i;
+}
+
int etm_perf_symlink(struct coresight_device *csdev, bool link)
{
char entry[sizeof("cpu9999999")];
@@ -371,18 +487,21 @@
{
int ret;
- etm_pmu.capabilities = PERF_PMU_CAP_EXCLUSIVE;
+ etm_pmu.capabilities = PERF_PMU_CAP_EXCLUSIVE;
- etm_pmu.attr_groups = etm_pmu_attr_groups;
- etm_pmu.task_ctx_nr = perf_sw_context;
- etm_pmu.read = etm_event_read;
- etm_pmu.event_init = etm_event_init;
- etm_pmu.setup_aux = etm_setup_aux;
- etm_pmu.free_aux = etm_free_aux;
- etm_pmu.start = etm_event_start;
- etm_pmu.stop = etm_event_stop;
- etm_pmu.add = etm_event_add;
- etm_pmu.del = etm_event_del;
+ etm_pmu.attr_groups = etm_pmu_attr_groups;
+ etm_pmu.task_ctx_nr = perf_sw_context;
+ etm_pmu.read = etm_event_read;
+ etm_pmu.event_init = etm_event_init;
+ etm_pmu.setup_aux = etm_setup_aux;
+ etm_pmu.free_aux = etm_free_aux;
+ etm_pmu.start = etm_event_start;
+ etm_pmu.stop = etm_event_stop;
+ etm_pmu.add = etm_event_add;
+ etm_pmu.del = etm_event_del;
+ etm_pmu.addr_filters_sync = etm_addr_filters_sync;
+ etm_pmu.addr_filters_validate = etm_addr_filters_validate;
+ etm_pmu.nr_addr_filters = ETM_ADDR_CMP_MAX;
ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1);
if (ret == 0)
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
index 87f5a13..3ffc9fe 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.h
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
@@ -18,8 +18,42 @@
#ifndef _CORESIGHT_ETM_PERF_H
#define _CORESIGHT_ETM_PERF_H
+#include "coresight-priv.h"
+
struct coresight_device;
+/*
+ * In both ETMv3 and v4 the maximum number of address comparator implentable
+ * is 8. The actual number is implementation specific and will be checked
+ * when filters are applied.
+ */
+#define ETM_ADDR_CMP_MAX 8
+
+/**
+ * struct etm_filter - single instruction range or start/stop configuration.
+ * @start_addr: The address to start tracing on.
+ * @stop_addr: The address to stop tracing on.
+ * @type: Is this a range or start/stop filter.
+ */
+struct etm_filter {
+ unsigned long start_addr;
+ unsigned long stop_addr;
+ enum etm_addr_type type;
+};
+
+/**
+ * struct etm_filters - set of filters for a session
+ * @etm_filter: All the filters for this session.
+ * @nr_filters: Number of filters
+ * @ssstatus: Status of the start/stop logic.
+ */
+struct etm_filters {
+ struct etm_filter etm_filter[ETM_ADDR_CMP_MAX];
+ unsigned int nr_filters;
+ bool ssstatus;
+};
+
+
#ifdef CONFIG_CORESIGHT
int etm_perf_symlink(struct coresight_device *csdev, bool link);
diff --git a/drivers/hwtracing/coresight/coresight-etm.h b/drivers/hwtracing/coresight/coresight-etm.h
index 51597cb..4a18ee4 100644
--- a/drivers/hwtracing/coresight/coresight-etm.h
+++ b/drivers/hwtracing/coresight/coresight-etm.h
@@ -259,14 +259,6 @@
struct etm_config config;
};
-enum etm_addr_type {
- ETM_ADDR_TYPE_NONE,
- ETM_ADDR_TYPE_SINGLE,
- ETM_ADDR_TYPE_RANGE,
- ETM_ADDR_TYPE_START,
- ETM_ADDR_TYPE_STOP,
-};
-
static inline void etm_writel(struct etm_drvdata *drvdata,
u32 val, u32 off)
{
diff --git a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
index 02d4b62..e9b0719 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
@@ -18,6 +18,7 @@
#include <linux/pm_runtime.h>
#include <linux/sysfs.h>
#include "coresight-etm.h"
+#include "coresight-priv.h"
static ssize_t nr_addr_cmp_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -1222,7 +1223,7 @@
};
#define coresight_etm3x_simple_func(name, offset) \
- coresight_simple_func(struct etm_drvdata, name, offset)
+ coresight_simple_func(struct etm_drvdata, NULL, name, offset)
coresight_etm3x_simple_func(etmccr, ETMCCR);
coresight_etm3x_simple_func(etmccer, ETMCCER);
diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
index 2de4cad..3fe368b 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -311,9 +311,10 @@
#define ETM3X_SUPPORTED_OPTIONS (ETMCR_CYC_ACC | ETMCR_TIMESTAMP_EN)
static int etm_parse_event_config(struct etm_drvdata *drvdata,
- struct perf_event_attr *attr)
+ struct perf_event *event)
{
struct etm_config *config = &drvdata->config;
+ struct perf_event_attr *attr = &event->attr;
if (!attr)
return -EINVAL;
@@ -459,7 +460,7 @@
}
static int etm_enable_perf(struct coresight_device *csdev,
- struct perf_event_attr *attr)
+ struct perf_event *event)
{
struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -467,7 +468,7 @@
return -EINVAL;
/* Configure the tracer based on the session's specifics */
- etm_parse_event_config(drvdata, attr);
+ etm_parse_event_config(drvdata, event);
/* And enable it */
etm_enable_hw(drvdata);
@@ -504,7 +505,7 @@
}
static int etm_enable(struct coresight_device *csdev,
- struct perf_event_attr *attr, u32 mode)
+ struct perf_event *event, u32 mode)
{
int ret;
u32 val;
@@ -521,7 +522,7 @@
ret = etm_enable_sysfs(csdev);
break;
case CS_MODE_PERF:
- ret = etm_enable_perf(csdev, attr);
+ ret = etm_enable_perf(csdev, event);
break;
default:
ret = -EINVAL;
@@ -601,7 +602,8 @@
dev_info(drvdata->dev, "ETM tracing disabled\n");
}
-static void etm_disable(struct coresight_device *csdev)
+static void etm_disable(struct coresight_device *csdev,
+ struct perf_event *event)
{
u32 mode;
struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -756,13 +758,9 @@
struct coresight_platform_data *pdata = NULL;
struct etm_drvdata *drvdata;
struct resource *res = &adev->res;
- struct coresight_desc *desc;
+ struct coresight_desc desc = { 0 };
struct device_node *np = adev->dev.of_node;
- desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
- if (!desc)
- return -ENOMEM;
-
drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
if (!drvdata)
return -ENOMEM;
@@ -825,13 +823,13 @@
etm_init_trace_id(drvdata);
etm_set_default(&drvdata->config);
- desc->type = CORESIGHT_DEV_TYPE_SOURCE;
- desc->subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
- desc->ops = &etm_cs_ops;
- desc->pdata = pdata;
- desc->dev = dev;
- desc->groups = coresight_etm_groups;
- drvdata->csdev = coresight_register(desc);
+ desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+ desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
+ desc.ops = &etm_cs_ops;
+ desc.pdata = pdata;
+ desc.dev = dev;
+ desc.groups = coresight_etm_groups;
+ drvdata->csdev = coresight_register(&desc);
if (IS_ERR(drvdata->csdev)) {
ret = PTR_ERR(drvdata->csdev);
goto err_arch_supported;
@@ -893,6 +891,11 @@
.mask = 0x0003ffff,
.data = "ETM 3.3",
},
+ { /* ETM 3.5 - Cortex-A5 */
+ .id = 0x0003b955,
+ .mask = 0x0003ffff,
+ .data = "ETM 3.5",
+ },
{ /* ETM 3.5 */
.id = 0x0003b956,
.mask = 0x0003ffff,
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index 7c84308..b9b1e9c 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -18,6 +18,7 @@
#include <linux/pm_runtime.h>
#include <linux/sysfs.h>
#include "coresight-etm4x.h"
+#include "coresight-priv.h"
static int etm4_set_mode_exclude(struct etmv4_drvdata *drvdata, bool exclude)
{
@@ -2039,15 +2040,42 @@
NULL,
};
-#define coresight_etm4x_simple_func(name, offset) \
- coresight_simple_func(struct etmv4_drvdata, name, offset)
+struct etmv4_reg {
+ void __iomem *addr;
+ u32 data;
+};
-coresight_etm4x_simple_func(trcoslsr, TRCOSLSR);
+static void do_smp_cross_read(void *data)
+{
+ struct etmv4_reg *reg = data;
+
+ reg->data = readl_relaxed(reg->addr);
+}
+
+static u32 etmv4_cross_read(const struct device *dev, u32 offset)
+{
+ struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
+ struct etmv4_reg reg;
+
+ reg.addr = drvdata->base + offset;
+ /*
+ * smp cross call ensures the CPU will be powered up before
+ * accessing the ETMv4 trace core registers
+ */
+ smp_call_function_single(drvdata->cpu, do_smp_cross_read, ®, 1);
+ return reg.data;
+}
+
+#define coresight_etm4x_simple_func(name, offset) \
+ coresight_simple_func(struct etmv4_drvdata, NULL, name, offset)
+
+#define coresight_etm4x_cross_read(name, offset) \
+ coresight_simple_func(struct etmv4_drvdata, etmv4_cross_read, \
+ name, offset)
+
coresight_etm4x_simple_func(trcpdcr, TRCPDCR);
coresight_etm4x_simple_func(trcpdsr, TRCPDSR);
coresight_etm4x_simple_func(trclsr, TRCLSR);
-coresight_etm4x_simple_func(trcconfig, TRCCONFIGR);
-coresight_etm4x_simple_func(trctraceid, TRCTRACEIDR);
coresight_etm4x_simple_func(trcauthstatus, TRCAUTHSTATUS);
coresight_etm4x_simple_func(trcdevid, TRCDEVID);
coresight_etm4x_simple_func(trcdevtype, TRCDEVTYPE);
@@ -2055,6 +2083,9 @@
coresight_etm4x_simple_func(trcpidr1, TRCPIDR1);
coresight_etm4x_simple_func(trcpidr2, TRCPIDR2);
coresight_etm4x_simple_func(trcpidr3, TRCPIDR3);
+coresight_etm4x_cross_read(trcoslsr, TRCOSLSR);
+coresight_etm4x_cross_read(trcconfig, TRCCONFIGR);
+coresight_etm4x_cross_read(trctraceid, TRCTRACEIDR);
static struct attribute *coresight_etmv4_mgmt_attrs[] = {
&dev_attr_trcoslsr.attr,
@@ -2073,19 +2104,19 @@
NULL,
};
-coresight_etm4x_simple_func(trcidr0, TRCIDR0);
-coresight_etm4x_simple_func(trcidr1, TRCIDR1);
-coresight_etm4x_simple_func(trcidr2, TRCIDR2);
-coresight_etm4x_simple_func(trcidr3, TRCIDR3);
-coresight_etm4x_simple_func(trcidr4, TRCIDR4);
-coresight_etm4x_simple_func(trcidr5, TRCIDR5);
+coresight_etm4x_cross_read(trcidr0, TRCIDR0);
+coresight_etm4x_cross_read(trcidr1, TRCIDR1);
+coresight_etm4x_cross_read(trcidr2, TRCIDR2);
+coresight_etm4x_cross_read(trcidr3, TRCIDR3);
+coresight_etm4x_cross_read(trcidr4, TRCIDR4);
+coresight_etm4x_cross_read(trcidr5, TRCIDR5);
/* trcidr[6,7] are reserved */
-coresight_etm4x_simple_func(trcidr8, TRCIDR8);
-coresight_etm4x_simple_func(trcidr9, TRCIDR9);
-coresight_etm4x_simple_func(trcidr10, TRCIDR10);
-coresight_etm4x_simple_func(trcidr11, TRCIDR11);
-coresight_etm4x_simple_func(trcidr12, TRCIDR12);
-coresight_etm4x_simple_func(trcidr13, TRCIDR13);
+coresight_etm4x_cross_read(trcidr8, TRCIDR8);
+coresight_etm4x_cross_read(trcidr9, TRCIDR9);
+coresight_etm4x_cross_read(trcidr10, TRCIDR10);
+coresight_etm4x_cross_read(trcidr11, TRCIDR11);
+coresight_etm4x_cross_read(trcidr12, TRCIDR12);
+coresight_etm4x_cross_read(trcidr13, TRCIDR13);
static struct attribute *coresight_etmv4_trcidr_attrs[] = {
&dev_attr_trcidr0.attr,
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index 1a5e0d1..4db8d6a 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -33,7 +33,6 @@
#include <linux/uaccess.h>
#include <linux/perf_event.h>
#include <linux/pm_runtime.h>
-#include <linux/perf_event.h>
#include <asm/sections.h>
#include <asm/local.h>
@@ -46,7 +45,9 @@
/* The number of ETMv4 currently registered */
static int etm4_count;
static struct etmv4_drvdata *etmdrvdata[NR_CPUS];
-static void etm4_set_default(struct etmv4_config *config);
+static void etm4_set_default_config(struct etmv4_config *config);
+static int etm4_set_event_filters(struct etmv4_drvdata *drvdata,
+ struct perf_event *event);
static enum cpuhp_state hp_online;
@@ -79,22 +80,8 @@
static int etm4_trace_id(struct coresight_device *csdev)
{
struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
- unsigned long flags;
- int trace_id = -1;
- if (!local_read(&drvdata->mode))
- return drvdata->trcid;
-
- spin_lock_irqsave(&drvdata->spinlock, flags);
-
- CS_UNLOCK(drvdata->base);
- trace_id = readl_relaxed(drvdata->base + TRCTRACEIDR);
- trace_id &= ETM_TRACEID_MASK;
- CS_LOCK(drvdata->base);
-
- spin_unlock_irqrestore(&drvdata->spinlock, flags);
-
- return trace_id;
+ return drvdata->trcid;
}
static void etm4_enable_hw(void *info)
@@ -113,8 +100,7 @@
/* wait for TRCSTATR.IDLE to go up */
if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
dev_err(drvdata->dev,
- "timeout observed when probing at offset %#x\n",
- TRCSTATR);
+ "timeout while waiting for Idle Trace Status\n");
writel_relaxed(config->pe_sel, drvdata->base + TRCPROCSELR);
writel_relaxed(config->cfg, drvdata->base + TRCCONFIGR);
@@ -180,14 +166,20 @@
writel_relaxed(config->vmid_mask0, drvdata->base + TRCVMIDCCTLR0);
writel_relaxed(config->vmid_mask1, drvdata->base + TRCVMIDCCTLR1);
+ /*
+ * Request to keep the trace unit powered and also
+ * emulation of powerdown
+ */
+ writel_relaxed(readl_relaxed(drvdata->base + TRCPDCR) | TRCPDCR_PU,
+ drvdata->base + TRCPDCR);
+
/* Enable the trace unit */
writel_relaxed(1, drvdata->base + TRCPRGCTLR);
/* wait for TRCSTATR.IDLE to go back down to '0' */
if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 0))
dev_err(drvdata->dev,
- "timeout observed when probing at offset %#x\n",
- TRCSTATR);
+ "timeout while waiting for Idle Trace Status\n");
CS_LOCK(drvdata->base);
@@ -195,12 +187,16 @@
}
static int etm4_parse_event_config(struct etmv4_drvdata *drvdata,
- struct perf_event_attr *attr)
+ struct perf_event *event)
{
+ int ret = 0;
struct etmv4_config *config = &drvdata->config;
+ struct perf_event_attr *attr = &event->attr;
- if (!attr)
- return -EINVAL;
+ if (!attr) {
+ ret = -EINVAL;
+ goto out;
+ }
/* Clear configuration from previous run */
memset(config, 0, sizeof(struct etmv4_config));
@@ -212,14 +208,12 @@
config->mode = ETM_MODE_EXCL_USER;
/* Always start from the default config */
- etm4_set_default(config);
+ etm4_set_default_config(config);
- /*
- * By default the tracers are configured to trace the whole address
- * range. Narrow the field only if requested by user space.
- */
- if (config->mode)
- etm4_config_trace_mode(config);
+ /* Configure filters specified on the perf cmd line, if any. */
+ ret = etm4_set_event_filters(drvdata, event);
+ if (ret)
+ goto out;
/* Go from generic option to ETMv4 specifics */
if (attr->config & BIT(ETM_OPT_CYCACC))
@@ -227,23 +221,30 @@
if (attr->config & BIT(ETM_OPT_TS))
config->cfg |= ETMv4_MODE_TIMESTAMP;
- return 0;
+out:
+ return ret;
}
static int etm4_enable_perf(struct coresight_device *csdev,
- struct perf_event_attr *attr)
+ struct perf_event *event)
{
+ int ret = 0;
struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
- if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id()))
- return -EINVAL;
+ if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id())) {
+ ret = -EINVAL;
+ goto out;
+ }
/* Configure the tracer based on the session's specifics */
- etm4_parse_event_config(drvdata, attr);
+ ret = etm4_parse_event_config(drvdata, event);
+ if (ret)
+ goto out;
/* And enable it */
etm4_enable_hw(drvdata);
- return 0;
+out:
+ return ret;
}
static int etm4_enable_sysfs(struct coresight_device *csdev)
@@ -274,7 +275,7 @@
}
static int etm4_enable(struct coresight_device *csdev,
- struct perf_event_attr *attr, u32 mode)
+ struct perf_event *event, u32 mode)
{
int ret;
u32 val;
@@ -291,7 +292,7 @@
ret = etm4_enable_sysfs(csdev);
break;
case CS_MODE_PERF:
- ret = etm4_enable_perf(csdev, attr);
+ ret = etm4_enable_perf(csdev, event);
break;
default:
ret = -EINVAL;
@@ -311,6 +312,11 @@
CS_UNLOCK(drvdata->base);
+ /* power can be removed from the trace unit now */
+ control = readl_relaxed(drvdata->base + TRCPDCR);
+ control &= ~TRCPDCR_PU;
+ writel_relaxed(control, drvdata->base + TRCPDCR);
+
control = readl_relaxed(drvdata->base + TRCPRGCTLR);
/* EN, bit[0] Trace unit enable bit */
@@ -326,14 +332,28 @@
dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu);
}
-static int etm4_disable_perf(struct coresight_device *csdev)
+static int etm4_disable_perf(struct coresight_device *csdev,
+ struct perf_event *event)
{
+ u32 control;
+ struct etm_filters *filters = event->hw.addr_filters;
struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id()))
return -EINVAL;
etm4_disable_hw(drvdata);
+
+ /*
+ * Check if the start/stop logic was active when the unit was stopped.
+ * That way we can re-enable the start/stop logic when the process is
+ * scheduled again. Configuration of the start/stop logic happens in
+ * function etm4_set_event_filters().
+ */
+ control = readl_relaxed(drvdata->base + TRCVICTLR);
+ /* TRCVICTLR::SSSTATUS, bit[9] */
+ filters->ssstatus = (control & BIT(9));
+
return 0;
}
@@ -362,7 +382,8 @@
dev_info(drvdata->dev, "ETM tracing disabled\n");
}
-static void etm4_disable(struct coresight_device *csdev)
+static void etm4_disable(struct coresight_device *csdev,
+ struct perf_event *event)
{
u32 mode;
struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -381,7 +402,7 @@
etm4_disable_sysfs(csdev);
break;
case CS_MODE_PERF:
- etm4_disable_perf(csdev);
+ etm4_disable_perf(csdev, event);
break;
}
@@ -564,21 +585,8 @@
CS_LOCK(drvdata->base);
}
-static void etm4_set_default(struct etmv4_config *config)
+static void etm4_set_default_config(struct etmv4_config *config)
{
- if (WARN_ON_ONCE(!config))
- return;
-
- /*
- * Make default initialisation trace everything
- *
- * Select the "always true" resource selector on the
- * "Enablign Event" line and configure address range comparator
- * '0' to trace all the possible address range. From there
- * configure the "include/exclude" engine to include address
- * range comparator '0'.
- */
-
/* disable all events tracing */
config->eventctrl0 = 0x0;
config->eventctrl1 = 0x0;
@@ -594,6 +602,108 @@
/* TRCVICTLR::EVENT = 0x01, select the always on logic */
config->vinst_ctrl |= BIT(0);
+}
+
+static u64 etm4_get_access_type(struct etmv4_config *config)
+{
+ u64 access_type = 0;
+
+ /*
+ * EXLEVEL_NS, bits[15:12]
+ * The Exception levels are:
+ * Bit[12] Exception level 0 - Application
+ * Bit[13] Exception level 1 - OS
+ * Bit[14] Exception level 2 - Hypervisor
+ * Bit[15] Never implemented
+ *
+ * Always stay away from hypervisor mode.
+ */
+ access_type = ETM_EXLEVEL_NS_HYP;
+
+ if (config->mode & ETM_MODE_EXCL_KERN)
+ access_type |= ETM_EXLEVEL_NS_OS;
+
+ if (config->mode & ETM_MODE_EXCL_USER)
+ access_type |= ETM_EXLEVEL_NS_APP;
+
+ /*
+ * EXLEVEL_S, bits[11:8], don't trace anything happening
+ * in secure state.
+ */
+ access_type |= (ETM_EXLEVEL_S_APP |
+ ETM_EXLEVEL_S_OS |
+ ETM_EXLEVEL_S_HYP);
+
+ return access_type;
+}
+
+static void etm4_set_comparator_filter(struct etmv4_config *config,
+ u64 start, u64 stop, int comparator)
+{
+ u64 access_type = etm4_get_access_type(config);
+
+ /* First half of default address comparator */
+ config->addr_val[comparator] = start;
+ config->addr_acc[comparator] = access_type;
+ config->addr_type[comparator] = ETM_ADDR_TYPE_RANGE;
+
+ /* Second half of default address comparator */
+ config->addr_val[comparator + 1] = stop;
+ config->addr_acc[comparator + 1] = access_type;
+ config->addr_type[comparator + 1] = ETM_ADDR_TYPE_RANGE;
+
+ /*
+ * Configure the ViewInst function to include this address range
+ * comparator.
+ *
+ * @comparator is divided by two since it is the index in the
+ * etmv4_config::addr_val array but register TRCVIIECTLR deals with
+ * address range comparator _pairs_.
+ *
+ * Therefore:
+ * index 0 -> compatator pair 0
+ * index 2 -> comparator pair 1
+ * index 4 -> comparator pair 2
+ * ...
+ * index 14 -> comparator pair 7
+ */
+ config->viiectlr |= BIT(comparator / 2);
+}
+
+static void etm4_set_start_stop_filter(struct etmv4_config *config,
+ u64 address, int comparator,
+ enum etm_addr_type type)
+{
+ int shift;
+ u64 access_type = etm4_get_access_type(config);
+
+ /* Configure the comparator */
+ config->addr_val[comparator] = address;
+ config->addr_acc[comparator] = access_type;
+ config->addr_type[comparator] = type;
+
+ /*
+ * Configure ViewInst Start-Stop control register.
+ * Addresses configured to start tracing go from bit 0 to n-1,
+ * while those configured to stop tracing from 16 to 16 + n-1.
+ */
+ shift = (type == ETM_ADDR_TYPE_START ? 0 : 16);
+ config->vissctlr |= BIT(shift + comparator);
+}
+
+static void etm4_set_default_filter(struct etmv4_config *config)
+{
+ u64 start, stop;
+
+ /*
+ * Configure address range comparator '0' to encompass all
+ * possible addresses.
+ */
+ start = 0x0;
+ stop = ~0x0;
+
+ etm4_set_comparator_filter(config, start, stop,
+ ETM_DEFAULT_ADDR_COMP);
/*
* TRCVICTLR::SSSTATUS == 1, the start-stop logic is
@@ -601,45 +711,158 @@
*/
config->vinst_ctrl |= BIT(9);
- /*
- * Configure address range comparator '0' to encompass all
- * possible addresses.
- */
-
- /* First half of default address comparator: start at address 0 */
- config->addr_val[ETM_DEFAULT_ADDR_COMP] = 0x0;
- /* trace instruction addresses */
- config->addr_acc[ETM_DEFAULT_ADDR_COMP] &= ~(BIT(0) | BIT(1));
- /* EXLEVEL_NS, bits[12:15], only trace application and kernel space */
- config->addr_acc[ETM_DEFAULT_ADDR_COMP] |= ETM_EXLEVEL_NS_HYP;
- /* EXLEVEL_S, bits[11:8], don't trace anything in secure state */
- config->addr_acc[ETM_DEFAULT_ADDR_COMP] |= (ETM_EXLEVEL_S_APP |
- ETM_EXLEVEL_S_OS |
- ETM_EXLEVEL_S_HYP);
- config->addr_type[ETM_DEFAULT_ADDR_COMP] = ETM_ADDR_TYPE_RANGE;
-
- /*
- * Second half of default address comparator: go all
- * the way to the top.
- */
- config->addr_val[ETM_DEFAULT_ADDR_COMP + 1] = ~0x0;
- /* trace instruction addresses */
- config->addr_acc[ETM_DEFAULT_ADDR_COMP + 1] &= ~(BIT(0) | BIT(1));
- /* Address comparator type must be equal for both halves */
- config->addr_acc[ETM_DEFAULT_ADDR_COMP + 1] =
- config->addr_acc[ETM_DEFAULT_ADDR_COMP];
- config->addr_type[ETM_DEFAULT_ADDR_COMP + 1] = ETM_ADDR_TYPE_RANGE;
-
- /*
- * Configure the ViewInst function to filter on address range
- * comparator '0'.
- */
- config->viiectlr = BIT(0);
-
- /* no start-stop filtering for ViewInst */
+ /* No start-stop filtering for ViewInst */
config->vissctlr = 0x0;
}
+static void etm4_set_default(struct etmv4_config *config)
+{
+ if (WARN_ON_ONCE(!config))
+ return;
+
+ /*
+ * Make default initialisation trace everything
+ *
+ * Select the "always true" resource selector on the
+ * "Enablign Event" line and configure address range comparator
+ * '0' to trace all the possible address range. From there
+ * configure the "include/exclude" engine to include address
+ * range comparator '0'.
+ */
+ etm4_set_default_config(config);
+ etm4_set_default_filter(config);
+}
+
+static int etm4_get_next_comparator(struct etmv4_drvdata *drvdata, u32 type)
+{
+ int nr_comparator, index = 0;
+ struct etmv4_config *config = &drvdata->config;
+
+ /*
+ * nr_addr_cmp holds the number of comparator _pair_, so time 2
+ * for the total number of comparators.
+ */
+ nr_comparator = drvdata->nr_addr_cmp * 2;
+
+ /* Go through the tally of comparators looking for a free one. */
+ while (index < nr_comparator) {
+ switch (type) {
+ case ETM_ADDR_TYPE_RANGE:
+ if (config->addr_type[index] == ETM_ADDR_TYPE_NONE &&
+ config->addr_type[index + 1] == ETM_ADDR_TYPE_NONE)
+ return index;
+
+ /* Address range comparators go in pairs */
+ index += 2;
+ break;
+ case ETM_ADDR_TYPE_START:
+ case ETM_ADDR_TYPE_STOP:
+ if (config->addr_type[index] == ETM_ADDR_TYPE_NONE)
+ return index;
+
+ /* Start/stop address can have odd indexes */
+ index += 1;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ /* If we are here all the comparators have been used. */
+ return -ENOSPC;
+}
+
+static int etm4_set_event_filters(struct etmv4_drvdata *drvdata,
+ struct perf_event *event)
+{
+ int i, comparator, ret = 0;
+ u64 address;
+ struct etmv4_config *config = &drvdata->config;
+ struct etm_filters *filters = event->hw.addr_filters;
+
+ if (!filters)
+ goto default_filter;
+
+ /* Sync events with what Perf got */
+ perf_event_addr_filters_sync(event);
+
+ /*
+ * If there are no filters to deal with simply go ahead with
+ * the default filter, i.e the entire address range.
+ */
+ if (!filters->nr_filters)
+ goto default_filter;
+
+ for (i = 0; i < filters->nr_filters; i++) {
+ struct etm_filter *filter = &filters->etm_filter[i];
+ enum etm_addr_type type = filter->type;
+
+ /* See if a comparator is free. */
+ comparator = etm4_get_next_comparator(drvdata, type);
+ if (comparator < 0) {
+ ret = comparator;
+ goto out;
+ }
+
+ switch (type) {
+ case ETM_ADDR_TYPE_RANGE:
+ etm4_set_comparator_filter(config,
+ filter->start_addr,
+ filter->stop_addr,
+ comparator);
+ /*
+ * TRCVICTLR::SSSTATUS == 1, the start-stop logic is
+ * in the started state
+ */
+ config->vinst_ctrl |= BIT(9);
+
+ /* No start-stop filtering for ViewInst */
+ config->vissctlr = 0x0;
+ break;
+ case ETM_ADDR_TYPE_START:
+ case ETM_ADDR_TYPE_STOP:
+ /* Get the right start or stop address */
+ address = (type == ETM_ADDR_TYPE_START ?
+ filter->start_addr :
+ filter->stop_addr);
+
+ /* Configure comparator */
+ etm4_set_start_stop_filter(config, address,
+ comparator, type);
+
+ /*
+ * If filters::ssstatus == 1, trace acquisition was
+ * started but the process was yanked away before the
+ * the stop address was hit. As such the start/stop
+ * logic needs to be re-started so that tracing can
+ * resume where it left.
+ *
+ * The start/stop logic status when a process is
+ * scheduled out is checked in function
+ * etm4_disable_perf().
+ */
+ if (filters->ssstatus)
+ config->vinst_ctrl |= BIT(9);
+
+ /* No include/exclude filtering for ViewInst */
+ config->viiectlr = 0x0;
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ goto out;
+
+
+default_filter:
+ etm4_set_default_filter(config);
+
+out:
+ return ret;
+}
+
void etm4_config_trace_mode(struct etmv4_config *config)
{
u32 addr_acc, mode;
@@ -727,13 +950,9 @@
struct coresight_platform_data *pdata = NULL;
struct etmv4_drvdata *drvdata;
struct resource *res = &adev->res;
- struct coresight_desc *desc;
+ struct coresight_desc desc = { 0 };
struct device_node *np = adev->dev.of_node;
- desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
- if (!desc)
- return -ENOMEM;
-
drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
if (!drvdata)
return -ENOMEM;
@@ -788,13 +1007,13 @@
etm4_init_trace_id(drvdata);
etm4_set_default(&drvdata->config);
- desc->type = CORESIGHT_DEV_TYPE_SOURCE;
- desc->subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
- desc->ops = &etm4_cs_ops;
- desc->pdata = pdata;
- desc->dev = dev;
- desc->groups = coresight_etmv4_groups;
- drvdata->csdev = coresight_register(desc);
+ desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+ desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
+ desc.ops = &etm4_cs_ops;
+ desc.pdata = pdata;
+ desc.dev = dev;
+ desc.groups = coresight_etmv4_groups;
+ drvdata->csdev = coresight_register(&desc);
if (IS_ERR(drvdata->csdev)) {
ret = PTR_ERR(drvdata->csdev);
goto err_arch_supported;
@@ -826,12 +1045,12 @@
}
static struct amba_id etm4_ids[] = {
- { /* ETM 4.0 - Qualcomm */
- .id = 0x0003b95d,
- .mask = 0x0003ffff,
+ { /* ETM 4.0 - Cortex-A53 */
+ .id = 0x000bb95d,
+ .mask = 0x000fffff,
.data = "ETM 4.0",
},
- { /* ETM 4.0 - Juno board */
+ { /* ETM 4.0 - Cortex-A57 */
.id = 0x000bb95e,
.mask = 0x000fffff,
.data = "ETM 4.0",
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
index 5359c51..ba8d3f8 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -183,6 +183,9 @@
#define TRCSTATR_IDLE_BIT 0
#define ETM_DEFAULT_ADDR_COMP 0
+/* PowerDown Control Register bits */
+#define TRCPDCR_PU BIT(3)
+
/* secure state access levels */
#define ETM_EXLEVEL_S_APP BIT(8)
#define ETM_EXLEVEL_S_OS BIT(9)
@@ -407,14 +410,6 @@
ETM_CTX_CTXID_VMID,
};
-enum etm_addr_type {
- ETM_ADDR_TYPE_NONE,
- ETM_ADDR_TYPE_SINGLE,
- ETM_ADDR_TYPE_RANGE,
- ETM_ADDR_TYPE_START,
- ETM_ADDR_TYPE_STOP,
-};
-
extern const struct attribute_group *coresight_etmv4_groups[];
void etm4_config_trace_mode(struct etmv4_config *config);
#endif
diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c
index 05df789..860fe6e 100644
--- a/drivers/hwtracing/coresight/coresight-funnel.c
+++ b/drivers/hwtracing/coresight/coresight-funnel.c
@@ -176,7 +176,7 @@
struct coresight_platform_data *pdata = NULL;
struct funnel_drvdata *drvdata;
struct resource *res = &adev->res;
- struct coresight_desc *desc;
+ struct coresight_desc desc = { 0 };
struct device_node *np = adev->dev.of_node;
if (np) {
@@ -207,17 +207,13 @@
drvdata->base = base;
pm_runtime_put(&adev->dev);
- desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
- if (!desc)
- return -ENOMEM;
-
- desc->type = CORESIGHT_DEV_TYPE_LINK;
- desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG;
- desc->ops = &funnel_cs_ops;
- desc->pdata = pdata;
- desc->dev = dev;
- desc->groups = coresight_funnel_groups;
- drvdata->csdev = coresight_register(desc);
+ desc.type = CORESIGHT_DEV_TYPE_LINK;
+ desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG;
+ desc.ops = &funnel_cs_ops;
+ desc.pdata = pdata;
+ desc.dev = dev;
+ desc.groups = coresight_funnel_groups;
+ drvdata->csdev = coresight_register(&desc);
if (IS_ERR(drvdata->csdev))
return PTR_ERR(drvdata->csdev);
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index ad975c5..196a14b 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -16,6 +16,7 @@
#include <linux/bitops.h>
#include <linux/io.h>
#include <linux/coresight.h>
+#include <linux/pm_runtime.h>
/*
* Coresight management registers (0xf00-0xfcc)
@@ -37,16 +38,32 @@
#define ETM_MODE_EXCL_KERN BIT(30)
#define ETM_MODE_EXCL_USER BIT(31)
-#define coresight_simple_func(type, name, offset) \
+typedef u32 (*coresight_read_fn)(const struct device *, u32 offset);
+#define coresight_simple_func(type, func, name, offset) \
static ssize_t name##_show(struct device *_dev, \
struct device_attribute *attr, char *buf) \
{ \
type *drvdata = dev_get_drvdata(_dev->parent); \
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", \
- readl_relaxed(drvdata->base + offset)); \
+ coresight_read_fn fn = func; \
+ u32 val; \
+ pm_runtime_get_sync(_dev->parent); \
+ if (fn) \
+ val = fn(_dev->parent, offset); \
+ else \
+ val = readl_relaxed(drvdata->base + offset); \
+ pm_runtime_put_sync(_dev->parent); \
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", val); \
} \
static DEVICE_ATTR_RO(name)
+enum etm_addr_type {
+ ETM_ADDR_TYPE_NONE,
+ ETM_ADDR_TYPE_SINGLE,
+ ETM_ADDR_TYPE_RANGE,
+ ETM_ADDR_TYPE_START,
+ ETM_ADDR_TYPE_STOP,
+};
+
enum cs_mode {
CS_MODE_DISABLED,
CS_MODE_SYSFS,
diff --git a/drivers/hwtracing/coresight/coresight-replicator-qcom.c b/drivers/hwtracing/coresight/coresight-replicator-qcom.c
index 700f710..0a3d15f 100644
--- a/drivers/hwtracing/coresight/coresight-replicator-qcom.c
+++ b/drivers/hwtracing/coresight/coresight-replicator-qcom.c
@@ -102,7 +102,7 @@
struct resource *res = &adev->res;
struct coresight_platform_data *pdata = NULL;
struct replicator_state *drvdata;
- struct coresight_desc *desc;
+ struct coresight_desc desc = { 0 };
struct device_node *np = adev->dev.of_node;
void __iomem *base;
@@ -134,16 +134,12 @@
dev_set_drvdata(dev, drvdata);
pm_runtime_put(&adev->dev);
- desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
- if (!desc)
- return -ENOMEM;
-
- desc->type = CORESIGHT_DEV_TYPE_LINK;
- desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
- desc->ops = &replicator_cs_ops;
- desc->pdata = adev->dev.platform_data;
- desc->dev = &adev->dev;
- drvdata->csdev = coresight_register(desc);
+ desc.type = CORESIGHT_DEV_TYPE_LINK;
+ desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
+ desc.ops = &replicator_cs_ops;
+ desc.pdata = adev->dev.platform_data;
+ desc.dev = &adev->dev;
+ drvdata->csdev = coresight_register(&desc);
if (IS_ERR(drvdata->csdev))
return PTR_ERR(drvdata->csdev);
diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c
index c6982e3..3756e71 100644
--- a/drivers/hwtracing/coresight/coresight-replicator.c
+++ b/drivers/hwtracing/coresight/coresight-replicator.c
@@ -69,7 +69,7 @@
struct device *dev = &pdev->dev;
struct coresight_platform_data *pdata = NULL;
struct replicator_drvdata *drvdata;
- struct coresight_desc *desc;
+ struct coresight_desc desc = { 0 };
struct device_node *np = pdev->dev.of_node;
if (np) {
@@ -95,18 +95,12 @@
pm_runtime_enable(&pdev->dev);
platform_set_drvdata(pdev, drvdata);
- desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
- if (!desc) {
- ret = -ENOMEM;
- goto out_disable_pm;
- }
-
- desc->type = CORESIGHT_DEV_TYPE_LINK;
- desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
- desc->ops = &replicator_cs_ops;
- desc->pdata = pdev->dev.platform_data;
- desc->dev = &pdev->dev;
- drvdata->csdev = coresight_register(desc);
+ desc.type = CORESIGHT_DEV_TYPE_LINK;
+ desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
+ desc.ops = &replicator_cs_ops;
+ desc.pdata = pdev->dev.platform_data;
+ desc.dev = &pdev->dev;
+ drvdata->csdev = coresight_register(&desc);
if (IS_ERR(drvdata->csdev)) {
ret = PTR_ERR(drvdata->csdev);
goto out_disable_pm;
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index 73be58a..49e0f1b 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -105,10 +105,12 @@
/**
* struct channel_space - central management entity for extended ports
* @base: memory mapped base address where channels start.
+ * @phys: physical base address of channel region.
* @guaraneed: is the channel delivery guaranteed.
*/
struct channel_space {
void __iomem *base;
+ phys_addr_t phys;
unsigned long *guaranteed;
};
@@ -196,7 +198,7 @@
}
static int stm_enable(struct coresight_device *csdev,
- struct perf_event_attr *attr, u32 mode)
+ struct perf_event *event, u32 mode)
{
u32 val;
struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -258,7 +260,8 @@
stm_hwevent_disable_hw(drvdata);
}
-static void stm_disable(struct coresight_device *csdev)
+static void stm_disable(struct coresight_device *csdev,
+ struct perf_event *event)
{
struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -353,7 +356,24 @@
if (!drvdata || !drvdata->csdev)
return;
- stm_disable(drvdata->csdev);
+ stm_disable(drvdata->csdev, NULL);
+}
+
+static phys_addr_t
+stm_mmio_addr(struct stm_data *stm_data, unsigned int master,
+ unsigned int channel, unsigned int nr_chans)
+{
+ struct stm_drvdata *drvdata = container_of(stm_data,
+ struct stm_drvdata, stm);
+ phys_addr_t addr;
+
+ addr = drvdata->chs.phys + channel * BYTES_PER_CHANNEL;
+
+ if (offset_in_page(addr) ||
+ offset_in_page(nr_chans * BYTES_PER_CHANNEL))
+ return 0;
+
+ return addr;
}
static long stm_generic_set_options(struct stm_data *stm_data,
@@ -616,7 +636,7 @@
static DEVICE_ATTR_RW(traceid);
#define coresight_stm_simple_func(name, offset) \
- coresight_simple_func(struct stm_drvdata, name, offset)
+ coresight_simple_func(struct stm_drvdata, NULL, name, offset)
coresight_stm_simple_func(tcsr, STMTCSR);
coresight_stm_simple_func(tsfreqr, STMTSFREQR);
@@ -761,7 +781,9 @@
drvdata->stm.sw_end = 1;
drvdata->stm.hw_override = true;
drvdata->stm.sw_nchannels = drvdata->numsp;
+ drvdata->stm.sw_mmiosz = BYTES_PER_CHANNEL;
drvdata->stm.packet = stm_generic_packet;
+ drvdata->stm.mmio_addr = stm_mmio_addr;
drvdata->stm.link = stm_generic_link;
drvdata->stm.unlink = stm_generic_unlink;
drvdata->stm.set_options = stm_generic_set_options;
@@ -778,7 +800,7 @@
struct resource *res = &adev->res;
struct resource ch_res;
size_t res_size, bitmap_size;
- struct coresight_desc *desc;
+ struct coresight_desc desc = { 0 };
struct device_node *np = adev->dev.of_node;
if (np) {
@@ -808,6 +830,7 @@
ret = stm_get_resource_byname(np, "stm-stimulus-base", &ch_res);
if (ret)
return ret;
+ drvdata->chs.phys = ch_res.start;
base = devm_ioremap_resource(dev, &ch_res);
if (IS_ERR(base))
@@ -843,19 +866,13 @@
return -EPROBE_DEFER;
}
- desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
- if (!desc) {
- ret = -ENOMEM;
- goto stm_unregister;
- }
-
- desc->type = CORESIGHT_DEV_TYPE_SOURCE;
- desc->subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE;
- desc->ops = &stm_cs_ops;
- desc->pdata = pdata;
- desc->dev = dev;
- desc->groups = coresight_stm_groups;
- drvdata->csdev = coresight_register(desc);
+ desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+ desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE;
+ desc.ops = &stm_cs_ops;
+ desc.pdata = pdata;
+ desc.dev = dev;
+ desc.groups = coresight_stm_groups;
+ drvdata->csdev = coresight_register(&desc);
if (IS_ERR(drvdata->csdev)) {
ret = PTR_ERR(drvdata->csdev);
goto stm_unregister;
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
index 466af86..d6941ea 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -22,7 +22,7 @@
#include "coresight-priv.h"
#include "coresight-tmc.h"
-void tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
+static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
{
CS_UNLOCK(drvdata->base);
@@ -48,6 +48,7 @@
int i;
bufp = drvdata->buf;
+ drvdata->len = 0;
while (1) {
for (i = 0; i < drvdata->memwidth; i++) {
read_data = readl_relaxed(drvdata->base + TMC_RRD);
@@ -55,6 +56,7 @@
return;
memcpy(bufp, &read_data, 4);
bufp += 4;
+ drvdata->len += 4;
}
}
}
@@ -166,7 +168,7 @@
spin_unlock_irqrestore(&drvdata->spinlock, flags);
/* Free memory outside the spinlock if need be */
- if (!used && buf)
+ if (!used)
kfree(buf);
if (!ret)
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 688be9e..886ea83 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -20,7 +20,7 @@
#include "coresight-priv.h"
#include "coresight-tmc.h"
-void tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
+static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
{
u32 axictl;
@@ -64,11 +64,17 @@
rwp = readl_relaxed(drvdata->base + TMC_RWP);
val = readl_relaxed(drvdata->base + TMC_STS);
- /* How much memory do we still have */
- if (val & BIT(0))
+ /*
+ * Adjust the buffer to point to the beginning of the trace data
+ * and update the available trace data.
+ */
+ if (val & TMC_STS_FULL) {
drvdata->buf = drvdata->vaddr + rwp - drvdata->paddr;
- else
+ drvdata->len = drvdata->size;
+ } else {
drvdata->buf = drvdata->vaddr;
+ drvdata->len = rwp - drvdata->paddr;
+ }
}
static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
index 9e02ac9..d8517d2 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.c
+++ b/drivers/hwtracing/coresight/coresight-tmc.c
@@ -38,8 +38,7 @@
if (coresight_timeout(drvdata->base,
TMC_STS, TMC_STS_TMCREADY_BIT, 1)) {
dev_err(drvdata->dev,
- "timeout observed when probing at offset %#x\n",
- TMC_STS);
+ "timeout while waiting for TMC to be Ready\n");
}
}
@@ -56,8 +55,7 @@
if (coresight_timeout(drvdata->base,
TMC_FFCR, TMC_FFCR_FLUSHMAN_BIT, 0)) {
dev_err(drvdata->dev,
- "timeout observed when probing at offset %#x\n",
- TMC_FFCR);
+ "timeout while waiting for completion of Manual Flush\n");
}
tmc_wait_for_tmcready(drvdata);
@@ -140,8 +138,8 @@
struct tmc_drvdata, miscdev);
char *bufp = drvdata->buf + *ppos;
- if (*ppos + len > drvdata->size)
- len = drvdata->size - *ppos;
+ if (*ppos + len > drvdata->len)
+ len = drvdata->len - *ppos;
if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
if (bufp == (char *)(drvdata->vaddr + drvdata->size))
@@ -160,7 +158,7 @@
*ppos += len;
dev_dbg(drvdata->dev, "%s: %zu bytes copied, %d bytes left\n",
- __func__, len, (int)(drvdata->size - *ppos));
+ __func__, len, (int)(drvdata->len - *ppos));
return len;
}
@@ -220,7 +218,7 @@
}
#define coresight_tmc_simple_func(name, offset) \
- coresight_simple_func(struct tmc_drvdata, name, offset)
+ coresight_simple_func(struct tmc_drvdata, NULL, name, offset)
coresight_tmc_simple_func(rsz, TMC_RSZ);
coresight_tmc_simple_func(sts, TMC_STS);
@@ -249,8 +247,8 @@
NULL,
};
-ssize_t trigger_cntr_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t trigger_cntr_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
unsigned long val = drvdata->trigger_cntr;
@@ -304,27 +302,32 @@
struct coresight_platform_data *pdata = NULL;
struct tmc_drvdata *drvdata;
struct resource *res = &adev->res;
- struct coresight_desc *desc;
+ struct coresight_desc desc = { 0 };
struct device_node *np = adev->dev.of_node;
if (np) {
pdata = of_get_coresight_platform_data(dev, np);
- if (IS_ERR(pdata))
- return PTR_ERR(pdata);
+ if (IS_ERR(pdata)) {
+ ret = PTR_ERR(pdata);
+ goto out;
+ }
adev->dev.platform_data = pdata;
}
+ ret = -ENOMEM;
drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
if (!drvdata)
- return -ENOMEM;
+ goto out;
drvdata->dev = &adev->dev;
dev_set_drvdata(dev, drvdata);
/* Validity for the resource is already checked by the AMBA core */
base = devm_ioremap_resource(dev, res);
- if (IS_ERR(base))
- return PTR_ERR(base);
+ if (IS_ERR(base)) {
+ ret = PTR_ERR(base);
+ goto out;
+ }
drvdata->base = base;
@@ -347,33 +350,28 @@
pm_runtime_put(&adev->dev);
- desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
- if (!desc) {
- ret = -ENOMEM;
- goto err_devm_kzalloc;
- }
-
- desc->pdata = pdata;
- desc->dev = dev;
- desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
- desc->groups = coresight_tmc_groups;
+ desc.pdata = pdata;
+ desc.dev = dev;
+ desc.groups = coresight_tmc_groups;
if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) {
- desc->type = CORESIGHT_DEV_TYPE_SINK;
- desc->ops = &tmc_etb_cs_ops;
+ desc.type = CORESIGHT_DEV_TYPE_SINK;
+ desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+ desc.ops = &tmc_etb_cs_ops;
} else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
- desc->type = CORESIGHT_DEV_TYPE_SINK;
- desc->ops = &tmc_etr_cs_ops;
+ desc.type = CORESIGHT_DEV_TYPE_SINK;
+ desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+ desc.ops = &tmc_etr_cs_ops;
} else {
- desc->type = CORESIGHT_DEV_TYPE_LINKSINK;
- desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_FIFO;
- desc->ops = &tmc_etf_cs_ops;
+ desc.type = CORESIGHT_DEV_TYPE_LINKSINK;
+ desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_FIFO;
+ desc.ops = &tmc_etf_cs_ops;
}
- drvdata->csdev = coresight_register(desc);
+ drvdata->csdev = coresight_register(&desc);
if (IS_ERR(drvdata->csdev)) {
ret = PTR_ERR(drvdata->csdev);
- goto err_devm_kzalloc;
+ goto out;
}
drvdata->miscdev.name = pdata->name;
@@ -381,16 +379,8 @@
drvdata->miscdev.fops = &tmc_fops;
ret = misc_register(&drvdata->miscdev);
if (ret)
- goto err_misc_register;
-
- return 0;
-
-err_misc_register:
- coresight_unregister(drvdata->csdev);
-err_devm_kzalloc:
- if (drvdata->config_type == TMC_CONFIG_TYPE_ETR)
- dma_free_coherent(dev, drvdata->size,
- drvdata->vaddr, drvdata->paddr);
+ coresight_unregister(drvdata->csdev);
+out:
return ret;
}
diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
index 5c5fe2a..44b3ae3 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.h
+++ b/drivers/hwtracing/coresight/coresight-tmc.h
@@ -98,7 +98,8 @@
* @buf: area of memory where trace data get sent.
* @paddr: DMA start location in RAM.
* @vaddr: virtual representation of @paddr.
- * @size: @buf size.
+ * @size: trace buffer size.
+ * @len: size of the available trace.
* @mode: how this TMC is being used.
* @config_type: TMC variant, must be of type @tmc_config_type.
* @memwidth: width of the memory interface databus, in bytes.
@@ -115,6 +116,7 @@
dma_addr_t paddr;
void __iomem *vaddr;
u32 size;
+ u32 len;
local_t mode;
enum tmc_config_type config_type;
enum tmc_mem_intf_width memwidth;
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index 4e471e2..0673baf 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -119,7 +119,7 @@
struct coresight_platform_data *pdata = NULL;
struct tpiu_drvdata *drvdata;
struct resource *res = &adev->res;
- struct coresight_desc *desc;
+ struct coresight_desc desc = { 0 };
struct device_node *np = adev->dev.of_node;
if (np) {
@@ -154,16 +154,12 @@
pm_runtime_put(&adev->dev);
- desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
- if (!desc)
- return -ENOMEM;
-
- desc->type = CORESIGHT_DEV_TYPE_SINK;
- desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_PORT;
- desc->ops = &tpiu_cs_ops;
- desc->pdata = pdata;
- desc->dev = dev;
- drvdata->csdev = coresight_register(desc);
+ desc.type = CORESIGHT_DEV_TYPE_SINK;
+ desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_PORT;
+ desc.ops = &tpiu_cs_ops;
+ desc.pdata = pdata;
+ desc.dev = dev;
+ drvdata->csdev = coresight_register(&desc);
if (IS_ERR(drvdata->csdev))
return PTR_ERR(drvdata->csdev);
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index d08d1ab..7bf00a0 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -257,7 +257,7 @@
{
if (atomic_dec_return(csdev->refcnt) == 0) {
if (source_ops(csdev)->disable) {
- source_ops(csdev)->disable(csdev);
+ source_ops(csdev)->disable(csdev, NULL);
csdev->enable = false;
}
}
@@ -429,7 +429,7 @@
path = kzalloc(sizeof(struct list_head), GFP_KERNEL);
if (!path)
- return NULL;
+ return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(path);
@@ -725,7 +725,8 @@
/* We have found at least one orphan connection */
if (conn->child_dev == NULL) {
/* Does it match this newly added device? */
- if (!strcmp(dev_name(&csdev->dev), conn->child_name)) {
+ if (conn->child_name &&
+ !strcmp(dev_name(&csdev->dev), conn->child_name)) {
conn->child_dev = csdev;
} else {
/* This component still has an orphan */
@@ -893,7 +894,7 @@
int nr_refcnts = 1;
atomic_t *refcnts = NULL;
struct coresight_device *csdev;
- struct coresight_connection *conns;
+ struct coresight_connection *conns = NULL;
csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
if (!csdev) {
@@ -921,16 +922,20 @@
csdev->nr_inport = desc->pdata->nr_inport;
csdev->nr_outport = desc->pdata->nr_outport;
- conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL);
- if (!conns) {
- ret = -ENOMEM;
- goto err_kzalloc_conns;
- }
- for (i = 0; i < csdev->nr_outport; i++) {
- conns[i].outport = desc->pdata->outports[i];
- conns[i].child_name = desc->pdata->child_names[i];
- conns[i].child_port = desc->pdata->child_ports[i];
+ /* Initialise connections if there is at least one outport */
+ if (csdev->nr_outport) {
+ conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL);
+ if (!conns) {
+ ret = -ENOMEM;
+ goto err_kzalloc_conns;
+ }
+
+ for (i = 0; i < csdev->nr_outport; i++) {
+ conns[i].outport = desc->pdata->outports[i];
+ conns[i].child_name = desc->pdata->child_names[i];
+ conns[i].child_port = desc->pdata->child_ports[i];
+ }
}
csdev->conns = conns;
diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
index b68da18..629e031 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -166,7 +166,7 @@
rdev = of_coresight_get_endpoint_device(rparent);
if (!rdev)
- continue;
+ return ERR_PTR(-EPROBE_DEFER);
pdata->child_names[i] = dev_name(rdev);
pdata->child_ports[i] = rendpoint.id;
@@ -184,6 +184,7 @@
break;
}
}
+ of_node_put(dn);
return pdata;
}
diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index 137125b..5ce71ce 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -773,13 +773,6 @@
/* Set the number of I2C channel instance */
adap_info->ch_num = id->driver_data;
- ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED,
- KBUILD_MODNAME, adap_info);
- if (ret) {
- pch_pci_err(pdev, "request_irq FAILED\n");
- goto err_request_irq;
- }
-
for (i = 0; i < adap_info->ch_num; i++) {
pch_adap = &adap_info->pch_data[i].pch_adapter;
adap_info->pch_i2c_suspended = false;
@@ -797,6 +790,17 @@
pch_adap->dev.of_node = pdev->dev.of_node;
pch_adap->dev.parent = &pdev->dev;
+ }
+
+ ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED,
+ KBUILD_MODNAME, adap_info);
+ if (ret) {
+ pch_pci_err(pdev, "request_irq FAILED\n");
+ goto err_request_irq;
+ }
+
+ for (i = 0; i < adap_info->ch_num; i++) {
+ pch_adap = &adap_info->pch_data[i].pch_adapter;
pch_i2c_init(&adap_info->pch_data[i]);
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 5ef9b73..26298af 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -1486,7 +1486,9 @@
priv->features |= FEATURE_IRQ;
priv->features |= FEATURE_SMBUS_PEC;
priv->features |= FEATURE_BLOCK_BUFFER;
- priv->features |= FEATURE_TCO;
+ /* If we have ACPI based watchdog use that instead */
+ if (!acpi_has_watchdog())
+ priv->features |= FEATURE_TCO;
priv->features |= FEATURE_HOST_NOTIFY;
break;
diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index 501bd15..a8497cf 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -1599,7 +1599,8 @@
#ifdef CONFIG_PM_SLEEP
static int qup_i2c_suspend(struct device *device)
{
- qup_i2c_pm_suspend_runtime(device);
+ if (!pm_runtime_suspended(device))
+ return qup_i2c_pm_suspend_runtime(device);
return 0;
}
diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c
index 528e755..3278ebf 100644
--- a/drivers/i2c/muxes/i2c-mux-pca954x.c
+++ b/drivers/i2c/muxes/i2c-mux-pca954x.c
@@ -164,7 +164,7 @@
/* Only select the channel if its different from the last channel */
if (data->last_chan != regval) {
ret = pca954x_reg_write(muxc->parent, client, regval);
- data->last_chan = regval;
+ data->last_chan = ret ? 0 : regval;
}
return ret;
diff --git a/drivers/iio/pressure/Kconfig b/drivers/iio/pressure/Kconfig
index d130cdc..7fa65ab 100644
--- a/drivers/iio/pressure/Kconfig
+++ b/drivers/iio/pressure/Kconfig
@@ -8,8 +8,6 @@
config BMP280
tristate "Bosch Sensortec BMP180/BMP280 pressure sensor I2C driver"
depends on (I2C || SPI_MASTER)
- depends on !(BMP085_I2C=y || BMP085_I2C=m)
- depends on !(BMP085_SPI=y || BMP085_SPI=m)
select REGMAP
select BMP280_I2C if (I2C)
select BMP280_SPI if (SPI_MASTER)
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index 5d11fea..f3135ae 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -950,6 +950,12 @@
.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
INPUT_DEVICE_ID_MATCH_ABSBIT,
.evbit = { BIT_MASK(EV_ABS) },
+ .absbit = { BIT_MASK(ABS_Z) },
+ },
+ {
+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+ INPUT_DEVICE_ID_MATCH_ABSBIT,
+ .evbit = { BIT_MASK(EV_ABS) },
.absbit = { BIT_MASK(ABS_WHEEL) },
},
{
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 65ebbd1..92595b9 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -1013,23 +1013,12 @@
.minor = UINPUT_MINOR,
.name = UINPUT_NAME,
};
+module_misc_device(uinput_misc);
+
MODULE_ALIAS_MISCDEV(UINPUT_MINOR);
MODULE_ALIAS("devname:" UINPUT_NAME);
-static int __init uinput_init(void)
-{
- return misc_register(&uinput_misc);
-}
-
-static void __exit uinput_exit(void)
-{
- misc_deregister(&uinput_misc);
-}
-
MODULE_AUTHOR("Aristeu Sergio Rozanski Filho");
MODULE_DESCRIPTION("User level driver support for input subsystem");
MODULE_LICENSE("GPL");
MODULE_VERSION("0.3");
-
-module_init(uinput_init);
-module_exit(uinput_exit);
diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c
index b2744a6..f502c84 100644
--- a/drivers/input/touchscreen/silead.c
+++ b/drivers/input/touchscreen/silead.c
@@ -390,9 +390,10 @@
data->max_fingers = 5; /* Most devices handle up-to 5 fingers */
}
- error = device_property_read_string(dev, "touchscreen-fw-name", &str);
+ error = device_property_read_string(dev, "firmware-name", &str);
if (!error)
- snprintf(data->fw_name, sizeof(data->fw_name), "%s", str);
+ snprintf(data->fw_name, sizeof(data->fw_name),
+ "silead/%s", str);
else
dev_dbg(dev, "Firmware file name read error. Using default.");
}
@@ -410,14 +411,14 @@
if (!acpi_id)
return -ENODEV;
- snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw",
- acpi_id->id);
+ snprintf(data->fw_name, sizeof(data->fw_name),
+ "silead/%s.fw", acpi_id->id);
for (i = 0; i < strlen(data->fw_name); i++)
data->fw_name[i] = tolower(data->fw_name[i]);
} else {
- snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw",
- id->name);
+ snprintf(data->fw_name, sizeof(data->fw_name),
+ "silead/%s.fw", id->name);
}
return 0;
@@ -426,7 +427,8 @@
static int silead_ts_set_default_fw_name(struct silead_ts_data *data,
const struct i2c_device_id *id)
{
- snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", id->name);
+ snprintf(data->fw_name, sizeof(data->fw_name),
+ "silead/%s.fw", id->name);
return 0;
}
#endif
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 96de97a..4025291 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -940,15 +940,13 @@
* Writes the command to the IOMMUs command buffer and informs the
* hardware about the new command.
*/
-static int iommu_queue_command_sync(struct amd_iommu *iommu,
- struct iommu_cmd *cmd,
- bool sync)
+static int __iommu_queue_command_sync(struct amd_iommu *iommu,
+ struct iommu_cmd *cmd,
+ bool sync)
{
u32 left, tail, head, next_tail;
- unsigned long flags;
again:
- spin_lock_irqsave(&iommu->lock, flags);
head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
@@ -957,15 +955,14 @@
if (left <= 2) {
struct iommu_cmd sync_cmd;
- volatile u64 sem = 0;
int ret;
- build_completion_wait(&sync_cmd, (u64)&sem);
+ iommu->cmd_sem = 0;
+
+ build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem);
copy_cmd_to_buffer(iommu, &sync_cmd, tail);
- spin_unlock_irqrestore(&iommu->lock, flags);
-
- if ((ret = wait_on_sem(&sem)) != 0)
+ if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0)
return ret;
goto again;
@@ -976,9 +973,21 @@
/* We need to sync now to make sure all commands are processed */
iommu->need_sync = sync;
+ return 0;
+}
+
+static int iommu_queue_command_sync(struct amd_iommu *iommu,
+ struct iommu_cmd *cmd,
+ bool sync)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ ret = __iommu_queue_command_sync(iommu, cmd, sync);
spin_unlock_irqrestore(&iommu->lock, flags);
- return 0;
+ return ret;
}
static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
@@ -993,19 +1002,29 @@
static int iommu_completion_wait(struct amd_iommu *iommu)
{
struct iommu_cmd cmd;
- volatile u64 sem = 0;
+ unsigned long flags;
int ret;
if (!iommu->need_sync)
return 0;
- build_completion_wait(&cmd, (u64)&sem);
- ret = iommu_queue_command_sync(iommu, &cmd, false);
+ build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ iommu->cmd_sem = 0;
+
+ ret = __iommu_queue_command_sync(iommu, &cmd, false);
if (ret)
- return ret;
+ goto out_unlock;
- return wait_on_sem(&sem);
+ ret = wait_on_sem(&iommu->cmd_sem);
+
+out_unlock:
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ return ret;
}
static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index caf5e38..9652848 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -524,6 +524,8 @@
struct irq_domain *ir_domain;
struct irq_domain *msi_domain;
#endif
+
+ volatile u64 __aligned(8) cmd_sem;
};
#define ACPIHID_UID_LEN 256
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 7f87289..82b0b5d 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -39,6 +39,7 @@
bool
depends on PCI
depends on PCI_MSI
+ select ACPI_IORT if ACPI
config ARM_NVIC
bool
@@ -156,6 +157,13 @@
select GENERIC_IRQ_CHIP
select IRQ_DOMAIN
+config JCORE_AIC
+ bool "J-Core integrated AIC"
+ depends on OF && (SUPERH || COMPILE_TEST)
+ select IRQ_DOMAIN
+ help
+ Support for the J-Core integrated AIC.
+
config RENESAS_INTC_IRQPIN
bool
select IRQ_DOMAIN
@@ -251,6 +259,9 @@
config MVEBU_ODMI
bool
+config MVEBU_PIC
+ bool
+
config LS_SCFG_MSI
def_bool y if SOC_LS1021A || ARCH_LAYERSCAPE
depends on PCI && PCI_MSI
@@ -264,3 +275,7 @@
select IRQ_DOMAIN
help
Support the EZchip NPS400 global interrupt controller
+
+config STM32_EXTI
+ bool
+ select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 4c203b6..b372e79 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -40,6 +40,7 @@
obj-$(CONFIG_IMGPDC_IRQ) += irq-imgpdc.o
obj-$(CONFIG_IRQ_MIPS_CPU) += irq-mips-cpu.o
obj-$(CONFIG_SIRF_IRQ) += irq-sirfsoc.o
+obj-$(CONFIG_JCORE_AIC) += irq-jcore-aic.o
obj-$(CONFIG_RENESAS_INTC_IRQPIN) += irq-renesas-intc-irqpin.o
obj-$(CONFIG_RENESAS_IRQC) += irq-renesas-irqc.o
obj-$(CONFIG_VERSATILE_FPGA_IRQ) += irq-versatile-fpga.o
@@ -68,6 +69,8 @@
obj-$(CONFIG_IMX_GPCV2) += irq-imx-gpcv2.o
obj-$(CONFIG_PIC32_EVIC) += irq-pic32-evic.o
obj-$(CONFIG_MVEBU_ODMI) += irq-mvebu-odmi.o
+obj-$(CONFIG_MVEBU_PIC) += irq-mvebu-pic.o
obj-$(CONFIG_LS_SCFG_MSI) += irq-ls-scfg-msi.o
obj-$(CONFIG_EZNPS_GIC) += irq-eznps.o
obj-$(CONFIG_ARCH_ASPEED) += irq-aspeed-vic.o
+obj-$(CONFIG_STM32_EXTI) += irq-stm32-exti.o
diff --git a/drivers/irqchip/irq-gic-pm.c b/drivers/irqchip/irq-gic-pm.c
index 4cbffba..ecafd29 100644
--- a/drivers/irqchip/irq-gic-pm.c
+++ b/drivers/irqchip/irq-gic-pm.c
@@ -64,7 +64,6 @@
static int gic_get_clocks(struct device *dev, const struct gic_clk_data *data)
{
- struct clk *clk;
unsigned int i;
int ret;
@@ -76,28 +75,16 @@
return ret;
for (i = 0; i < data->num_clocks; i++) {
- clk = of_clk_get_by_name(dev->of_node, data->clocks[i]);
- if (IS_ERR(clk)) {
- dev_err(dev, "failed to get clock %s\n",
- data->clocks[i]);
- ret = PTR_ERR(clk);
- goto error;
- }
-
- ret = pm_clk_add_clk(dev, clk);
+ ret = of_pm_clk_add_clk(dev, data->clocks[i]);
if (ret) {
- dev_err(dev, "failed to add clock at index %d\n", i);
- clk_put(clk);
- goto error;
+ dev_err(dev, "failed to add clock %s\n",
+ data->clocks[i]);
+ pm_clk_destroy(dev);
+ return ret;
}
}
return 0;
-
-error:
- pm_clk_destroy(dev);
-
- return ret;
}
static int gic_probe(struct platform_device *pdev)
diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
index aee60ed..aee1c60 100644
--- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
@@ -15,6 +15,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/acpi_iort.h>
#include <linux/msi.h>
#include <linux/of.h>
#include <linux/of_irq.h>
@@ -106,34 +107,91 @@
{},
};
-static int __init its_pci_msi_init(void)
+static int __init its_pci_msi_init_one(struct fwnode_handle *handle,
+ const char *name)
+{
+ struct irq_domain *parent;
+
+ parent = irq_find_matching_fwnode(handle, DOMAIN_BUS_NEXUS);
+ if (!parent || !msi_get_domain_info(parent)) {
+ pr_err("%s: Unable to locate ITS domain\n", name);
+ return -ENXIO;
+ }
+
+ if (!pci_msi_create_irq_domain(handle, &its_pci_msi_domain_info,
+ parent)) {
+ pr_err("%s: Unable to create PCI domain\n", name);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int __init its_pci_of_msi_init(void)
{
struct device_node *np;
- struct irq_domain *parent;
for (np = of_find_matching_node(NULL, its_device_id); np;
np = of_find_matching_node(np, its_device_id)) {
if (!of_property_read_bool(np, "msi-controller"))
continue;
- parent = irq_find_matching_host(np, DOMAIN_BUS_NEXUS);
- if (!parent || !msi_get_domain_info(parent)) {
- pr_err("%s: unable to locate ITS domain\n",
- np->full_name);
+ if (its_pci_msi_init_one(of_node_to_fwnode(np), np->full_name))
continue;
- }
-
- if (!pci_msi_create_irq_domain(of_node_to_fwnode(np),
- &its_pci_msi_domain_info,
- parent)) {
- pr_err("%s: unable to create PCI domain\n",
- np->full_name);
- continue;
- }
pr_info("PCI/MSI: %s domain created\n", np->full_name);
}
return 0;
}
+
+#ifdef CONFIG_ACPI
+
+static int __init
+its_pci_msi_parse_madt(struct acpi_subtable_header *header,
+ const unsigned long end)
+{
+ struct acpi_madt_generic_translator *its_entry;
+ struct fwnode_handle *dom_handle;
+ const char *node_name;
+ int err = -ENXIO;
+
+ its_entry = (struct acpi_madt_generic_translator *)header;
+ node_name = kasprintf(GFP_KERNEL, "ITS@0x%lx",
+ (long)its_entry->base_address);
+ dom_handle = iort_find_domain_token(its_entry->translation_id);
+ if (!dom_handle) {
+ pr_err("%s: Unable to locate ITS domain handle\n", node_name);
+ goto out;
+ }
+
+ err = its_pci_msi_init_one(dom_handle, node_name);
+ if (!err)
+ pr_info("PCI/MSI: %s domain created\n", node_name);
+
+out:
+ kfree(node_name);
+ return err;
+}
+
+static int __init its_pci_acpi_msi_init(void)
+{
+ acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR,
+ its_pci_msi_parse_madt, 0);
+ return 0;
+}
+#else
+static int __init its_pci_acpi_msi_init(void)
+{
+ return 0;
+}
+#endif
+
+static int __init its_pci_msi_init(void)
+{
+ its_pci_of_msi_init();
+ its_pci_acpi_msi_init();
+
+ return 0;
+}
early_initcall(its_pci_msi_init);
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 36b9c28..35c851c 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -15,10 +15,13 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/acpi.h>
#include <linux/bitmap.h>
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/acpi_iort.h>
#include <linux/log2.h>
#include <linux/mm.h>
#include <linux/msi.h>
@@ -75,7 +78,7 @@
raw_spinlock_t lock;
struct list_head entry;
void __iomem *base;
- unsigned long phys_base;
+ phys_addr_t phys_base;
struct its_cmd_block *cmd_base;
struct its_cmd_block *cmd_write;
struct its_baser tables[GITS_BASER_NR_REGS];
@@ -115,6 +118,7 @@
static LIST_HEAD(its_nodes);
static DEFINE_SPINLOCK(its_lock);
static struct rdists *gic_rdists;
+static struct irq_domain *its_parent;
#define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist))
#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
@@ -1437,6 +1441,11 @@
fwspec.param[0] = GIC_IRQ_TYPE_LPI;
fwspec.param[1] = hwirq;
fwspec.param[2] = IRQ_TYPE_EDGE_RISING;
+ } else if (is_fwnode_irqchip(domain->parent->fwnode)) {
+ fwspec.fwnode = domain->parent->fwnode;
+ fwspec.param_count = 2;
+ fwspec.param[0] = hwirq;
+ fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
} else {
return -EINVAL;
}
@@ -1614,44 +1623,59 @@
gic_enable_quirks(iidr, its_quirks, its);
}
-static int __init its_probe(struct device_node *node,
- struct irq_domain *parent)
+static int its_init_domain(struct fwnode_handle *handle, struct its_node *its)
{
- struct resource res;
+ struct irq_domain *inner_domain;
+ struct msi_domain_info *info;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ inner_domain = irq_domain_create_tree(handle, &its_domain_ops, its);
+ if (!inner_domain) {
+ kfree(info);
+ return -ENOMEM;
+ }
+
+ inner_domain->parent = its_parent;
+ inner_domain->bus_token = DOMAIN_BUS_NEXUS;
+ info->ops = &its_msi_domain_ops;
+ info->data = its;
+ inner_domain->host_data = info;
+
+ return 0;
+}
+
+static int __init its_probe_one(struct resource *res,
+ struct fwnode_handle *handle, int numa_node)
+{
struct its_node *its;
void __iomem *its_base;
- struct irq_domain *inner_domain;
u32 val;
u64 baser, tmp;
int err;
- err = of_address_to_resource(node, 0, &res);
- if (err) {
- pr_warn("%s: no regs?\n", node->full_name);
- return -ENXIO;
- }
-
- its_base = ioremap(res.start, resource_size(&res));
+ its_base = ioremap(res->start, resource_size(res));
if (!its_base) {
- pr_warn("%s: unable to map registers\n", node->full_name);
+ pr_warn("ITS@%pa: Unable to map ITS registers\n", &res->start);
return -ENOMEM;
}
val = readl_relaxed(its_base + GITS_PIDR2) & GIC_PIDR2_ARCH_MASK;
if (val != 0x30 && val != 0x40) {
- pr_warn("%s: no ITS detected, giving up\n", node->full_name);
+ pr_warn("ITS@%pa: No ITS detected, giving up\n", &res->start);
err = -ENODEV;
goto out_unmap;
}
err = its_force_quiescent(its_base);
if (err) {
- pr_warn("%s: failed to quiesce, giving up\n",
- node->full_name);
+ pr_warn("ITS@%pa: Failed to quiesce, giving up\n", &res->start);
goto out_unmap;
}
- pr_info("ITS: %s\n", node->full_name);
+ pr_info("ITS %pR\n", res);
its = kzalloc(sizeof(*its), GFP_KERNEL);
if (!its) {
@@ -1663,9 +1687,9 @@
INIT_LIST_HEAD(&its->entry);
INIT_LIST_HEAD(&its->its_device_list);
its->base = its_base;
- its->phys_base = res.start;
+ its->phys_base = res->start;
its->ite_size = ((readl_relaxed(its_base + GITS_TYPER) >> 4) & 0xf) + 1;
- its->numa_node = of_node_to_nid(node);
+ its->numa_node = numa_node;
its->cmd_base = kzalloc(ITS_CMD_QUEUE_SZ, GFP_KERNEL);
if (!its->cmd_base) {
@@ -1712,28 +1736,9 @@
writeq_relaxed(0, its->base + GITS_CWRITER);
writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR);
- if (of_property_read_bool(node, "msi-controller")) {
- struct msi_domain_info *info;
-
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info) {
- err = -ENOMEM;
- goto out_free_tables;
- }
-
- inner_domain = irq_domain_add_tree(node, &its_domain_ops, its);
- if (!inner_domain) {
- err = -ENOMEM;
- kfree(info);
- goto out_free_tables;
- }
-
- inner_domain->parent = parent;
- inner_domain->bus_token = DOMAIN_BUS_NEXUS;
- info->ops = &its_msi_domain_ops;
- info->data = its;
- inner_domain->host_data = info;
- }
+ err = its_init_domain(handle, its);
+ if (err)
+ goto out_free_tables;
spin_lock(&its_lock);
list_add(&its->entry, &its_nodes);
@@ -1749,7 +1754,7 @@
kfree(its);
out_unmap:
iounmap(its_base);
- pr_err("ITS: failed probing %s (%d)\n", node->full_name, err);
+ pr_err("ITS@%pa: failed probing (%d)\n", &res->start, err);
return err;
}
@@ -1777,15 +1782,91 @@
{},
};
-int __init its_init(struct device_node *node, struct rdists *rdists,
- struct irq_domain *parent_domain)
+static int __init its_of_probe(struct device_node *node)
{
struct device_node *np;
+ struct resource res;
for (np = of_find_matching_node(node, its_device_id); np;
np = of_find_matching_node(np, its_device_id)) {
- its_probe(np, parent_domain);
+ if (!of_property_read_bool(np, "msi-controller")) {
+ pr_warn("%s: no msi-controller property, ITS ignored\n",
+ np->full_name);
+ continue;
+ }
+
+ if (of_address_to_resource(np, 0, &res)) {
+ pr_warn("%s: no regs?\n", np->full_name);
+ continue;
+ }
+
+ its_probe_one(&res, &np->fwnode, of_node_to_nid(np));
}
+ return 0;
+}
+
+#ifdef CONFIG_ACPI
+
+#define ACPI_GICV3_ITS_MEM_SIZE (SZ_128K)
+
+static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header,
+ const unsigned long end)
+{
+ struct acpi_madt_generic_translator *its_entry;
+ struct fwnode_handle *dom_handle;
+ struct resource res;
+ int err;
+
+ its_entry = (struct acpi_madt_generic_translator *)header;
+ memset(&res, 0, sizeof(res));
+ res.start = its_entry->base_address;
+ res.end = its_entry->base_address + ACPI_GICV3_ITS_MEM_SIZE - 1;
+ res.flags = IORESOURCE_MEM;
+
+ dom_handle = irq_domain_alloc_fwnode((void *)its_entry->base_address);
+ if (!dom_handle) {
+ pr_err("ITS@%pa: Unable to allocate GICv3 ITS domain token\n",
+ &res.start);
+ return -ENOMEM;
+ }
+
+ err = iort_register_domain_token(its_entry->translation_id, dom_handle);
+ if (err) {
+ pr_err("ITS@%pa: Unable to register GICv3 ITS domain token (ITS ID %d) to IORT\n",
+ &res.start, its_entry->translation_id);
+ goto dom_err;
+ }
+
+ err = its_probe_one(&res, dom_handle, NUMA_NO_NODE);
+ if (!err)
+ return 0;
+
+ iort_deregister_domain_token(its_entry->translation_id);
+dom_err:
+ irq_domain_free_fwnode(dom_handle);
+ return err;
+}
+
+static void __init its_acpi_probe(void)
+{
+ acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR,
+ gic_acpi_parse_madt_its, 0);
+}
+#else
+static void __init its_acpi_probe(void) { }
+#endif
+
+int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
+ struct irq_domain *parent_domain)
+{
+ struct device_node *of_node;
+
+ its_parent = parent_domain;
+ of_node = to_of_node(handle);
+ if (of_node)
+ its_of_probe(of_node);
+ else
+ its_acpi_probe();
if (list_empty(&its_nodes)) {
pr_warn("ITS: No ITS available, not enabling LPIs\n");
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index ede5672..9b81bd8 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -495,6 +495,14 @@
/* Set priority mask register */
gic_write_pmr(DEFAULT_PMR_VALUE);
+ /*
+ * Some firmwares hand over to the kernel with the BPR changed from
+ * its reset value (and with a value large enough to prevent
+ * any pre-emptive interrupts from working at all). Writing a zero
+ * to BPR restores is reset value.
+ */
+ gic_write_bpr1(0);
+
if (static_key_true(&supports_deactivate)) {
/* EOI drops priority only (mode 1) */
gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop);
@@ -548,7 +556,7 @@
static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
unsigned long cluster_id)
{
- int cpu = *base_cpu;
+ int next_cpu, cpu = *base_cpu;
unsigned long mpidr = cpu_logical_map(cpu);
u16 tlist = 0;
@@ -562,9 +570,10 @@
tlist |= 1 << (mpidr & 0xf);
- cpu = cpumask_next(cpu, mask);
- if (cpu >= nr_cpu_ids)
+ next_cpu = cpumask_next(cpu, mask);
+ if (next_cpu >= nr_cpu_ids)
goto out;
+ cpu = next_cpu;
mpidr = cpu_logical_map(cpu);
@@ -910,7 +919,6 @@
u64 redist_stride,
struct fwnode_handle *handle)
{
- struct device_node *node;
u32 typer;
int gic_irqs;
int err;
@@ -951,10 +959,8 @@
set_handle_irq(gic_handle_irq);
- node = to_of_node(handle);
- if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis() &&
- node) /* Temp hack to prevent ITS init for ACPI */
- its_init(node, &gic_data.rdists, gic_data.domain);
+ if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis())
+ its_init(handle, &gic_data.rdists, gic_data.domain);
gic_smp_init();
gic_dist_init();
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 390fac5..58e5b4e 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -91,7 +91,27 @@
#endif
};
-static DEFINE_RAW_SPINLOCK(irq_controller_lock);
+#ifdef CONFIG_BL_SWITCHER
+
+static DEFINE_RAW_SPINLOCK(cpu_map_lock);
+
+#define gic_lock_irqsave(f) \
+ raw_spin_lock_irqsave(&cpu_map_lock, (f))
+#define gic_unlock_irqrestore(f) \
+ raw_spin_unlock_irqrestore(&cpu_map_lock, (f))
+
+#define gic_lock() raw_spin_lock(&cpu_map_lock)
+#define gic_unlock() raw_spin_unlock(&cpu_map_lock)
+
+#else
+
+#define gic_lock_irqsave(f) do { (void)(f); } while(0)
+#define gic_unlock_irqrestore(f) do { (void)(f); } while(0)
+
+#define gic_lock() do { } while(0)
+#define gic_unlock() do { } while(0)
+
+#endif
/*
* The GIC mapping of CPU interfaces does not necessarily match
@@ -317,12 +337,12 @@
if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
return -EINVAL;
- raw_spin_lock_irqsave(&irq_controller_lock, flags);
+ gic_lock_irqsave(flags);
mask = 0xff << shift;
bit = gic_cpu_map[cpu] << shift;
val = readl_relaxed(reg) & ~mask;
writel_relaxed(val | bit, reg);
- raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
+ gic_unlock_irqrestore(flags);
return IRQ_SET_MASK_OK_DONE;
}
@@ -374,9 +394,7 @@
chained_irq_enter(chip, desc);
- raw_spin_lock(&irq_controller_lock);
status = readl_relaxed(gic_data_cpu_base(chip_data) + GIC_CPU_INTACK);
- raw_spin_unlock(&irq_controller_lock);
gic_irq = (status & GICC_IAR_INT_ID_MASK);
if (gic_irq == GICC_INT_SPURIOUS)
@@ -776,7 +794,7 @@
return;
}
- raw_spin_lock_irqsave(&irq_controller_lock, flags);
+ gic_lock_irqsave(flags);
/* Convert our logical CPU mask into a physical one. */
for_each_cpu(cpu, mask)
@@ -791,7 +809,7 @@
/* this always happens on GIC0 */
writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
- raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
+ gic_unlock_irqrestore(flags);
}
#endif
@@ -859,7 +877,7 @@
cur_target_mask = 0x01010101 << cur_cpu_id;
ror_val = (cur_cpu_id - new_cpu_id) & 31;
- raw_spin_lock(&irq_controller_lock);
+ gic_lock();
/* Update the target interface for this logical CPU */
gic_cpu_map[cpu] = 1 << new_cpu_id;
@@ -879,7 +897,7 @@
}
}
- raw_spin_unlock(&irq_controller_lock);
+ gic_unlock();
/*
* Now let's migrate and clear any potential SGIs that might be
@@ -921,7 +939,7 @@
return gic_dist_physaddr + GIC_DIST_SOFTINT;
}
-void __init gic_init_physaddr(struct device_node *node)
+static void __init gic_init_physaddr(struct device_node *node)
{
struct resource res;
if (of_address_to_resource(node, 0, &res) == 0) {
diff --git a/drivers/irqchip/irq-jcore-aic.c b/drivers/irqchip/irq-jcore-aic.c
new file mode 100644
index 0000000..84b01de
--- /dev/null
+++ b/drivers/irqchip/irq-jcore-aic.c
@@ -0,0 +1,95 @@
+/*
+ * J-Core SoC AIC driver
+ *
+ * Copyright (C) 2015-2016 Smart Energy Instruments, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#define JCORE_AIC_MAX_HWIRQ 127
+#define JCORE_AIC1_MIN_HWIRQ 16
+#define JCORE_AIC2_MIN_HWIRQ 64
+
+#define JCORE_AIC1_INTPRI_REG 8
+
+static struct irq_chip jcore_aic;
+
+static int jcore_aic_irqdomain_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ struct irq_chip *aic = d->host_data;
+
+ irq_set_chip_and_handler(irq, aic, handle_simple_irq);
+
+ return 0;
+}
+
+static const struct irq_domain_ops jcore_aic_irqdomain_ops = {
+ .map = jcore_aic_irqdomain_map,
+ .xlate = irq_domain_xlate_onecell,
+};
+
+static void noop(struct irq_data *data)
+{
+}
+
+static int __init aic_irq_of_init(struct device_node *node,
+ struct device_node *parent)
+{
+ unsigned min_irq = JCORE_AIC2_MIN_HWIRQ;
+ unsigned dom_sz = JCORE_AIC_MAX_HWIRQ+1;
+ struct irq_domain *domain;
+
+ pr_info("Initializing J-Core AIC\n");
+
+ /* AIC1 needs priority initialization to receive interrupts. */
+ if (of_device_is_compatible(node, "jcore,aic1")) {
+ unsigned cpu;
+
+ for_each_present_cpu(cpu) {
+ void __iomem *base = of_iomap(node, cpu);
+
+ if (!base) {
+ pr_err("Unable to map AIC for cpu %u\n", cpu);
+ return -ENOMEM;
+ }
+ __raw_writel(0xffffffff, base + JCORE_AIC1_INTPRI_REG);
+ iounmap(base);
+ }
+ min_irq = JCORE_AIC1_MIN_HWIRQ;
+ }
+
+ /*
+ * The irq chip framework requires either mask/unmask or enable/disable
+ * function pointers to be provided, but the hardware does not have any
+ * such mechanism; the only interrupt masking is at the cpu level and
+ * it affects all interrupts. We provide dummy mask/unmask. The hardware
+ * handles all interrupt control and clears pending status when the cpu
+ * accepts the interrupt.
+ */
+ jcore_aic.irq_mask = noop;
+ jcore_aic.irq_unmask = noop;
+ jcore_aic.name = "AIC";
+
+ domain = irq_domain_add_linear(node, dom_sz, &jcore_aic_irqdomain_ops,
+ &jcore_aic);
+ if (!domain)
+ return -ENOMEM;
+ irq_create_strict_mappings(domain, min_irq, min_irq, dom_sz - min_irq);
+
+ return 0;
+}
+
+IRQCHIP_DECLARE(jcore_aic2, "jcore,aic2", aic_irq_of_init);
+IRQCHIP_DECLARE(jcore_aic1, "jcore,aic1", aic_irq_of_init);
diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c
index deb89d6..54a5e87 100644
--- a/drivers/irqchip/irq-keystone.c
+++ b/drivers/irqchip/irq-keystone.c
@@ -109,7 +109,7 @@
dev_dbg(kirq->dev, "dispatch bit %d, virq %d\n",
src, virq);
if (!virq)
- dev_warn(kirq->dev, "sporious irq detected hwirq %d, virq %d\n",
+ dev_warn(kirq->dev, "spurious irq detected hwirq %d, virq %d\n",
src, virq);
generic_handle_irq(virq);
}
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 83f4983..c0178a1 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -371,18 +371,13 @@
bitmap_and(pending, pending, intrmask, gic_shared_intrs);
bitmap_and(pending, pending, pcpu_mask, gic_shared_intrs);
- intr = find_first_bit(pending, gic_shared_intrs);
- while (intr != gic_shared_intrs) {
+ for_each_set_bit(intr, pending, gic_shared_intrs) {
virq = irq_linear_revmap(gic_irq_domain,
GIC_SHARED_TO_HWIRQ(intr));
if (chained)
generic_handle_irq(virq);
else
do_IRQ(virq);
-
- /* go to next pending bit */
- bitmap_clear(pending, intr, 1);
- intr = find_first_bit(pending, gic_shared_intrs);
}
}
@@ -518,18 +513,13 @@
bitmap_and(&pending, &pending, &masked, GIC_NUM_LOCAL_INTRS);
- intr = find_first_bit(&pending, GIC_NUM_LOCAL_INTRS);
- while (intr != GIC_NUM_LOCAL_INTRS) {
+ for_each_set_bit(intr, &pending, GIC_NUM_LOCAL_INTRS) {
virq = irq_linear_revmap(gic_irq_domain,
GIC_LOCAL_TO_HWIRQ(intr));
if (chained)
generic_handle_irq(virq);
else
do_IRQ(virq);
-
- /* go to next pending bit */
- bitmap_clear(&pending, intr, 1);
- intr = find_first_bit(&pending, GIC_NUM_LOCAL_INTRS);
}
}
@@ -638,27 +628,6 @@
if (!gic_local_irq_is_routable(intr))
return -EPERM;
- /*
- * HACK: These are all really percpu interrupts, but the rest
- * of the MIPS kernel code does not use the percpu IRQ API for
- * the CP0 timer and performance counter interrupts.
- */
- switch (intr) {
- case GIC_LOCAL_INT_TIMER:
- case GIC_LOCAL_INT_PERFCTR:
- case GIC_LOCAL_INT_FDC:
- irq_set_chip_and_handler(virq,
- &gic_all_vpes_local_irq_controller,
- handle_percpu_irq);
- break;
- default:
- irq_set_chip_and_handler(virq,
- &gic_local_irq_controller,
- handle_percpu_devid_irq);
- irq_set_percpu_devid(virq);
- break;
- }
-
spin_lock_irqsave(&gic_lock, flags);
for (i = 0; i < gic_vpes; i++) {
u32 val = GIC_MAP_TO_PIN_MSK | gic_cpu_pin;
@@ -724,16 +693,42 @@
return 0;
}
-static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq,
- irq_hw_number_t hw)
+static int gic_setup_dev_chip(struct irq_domain *d, unsigned int virq,
+ unsigned int hwirq)
{
- if (GIC_HWIRQ_TO_LOCAL(hw) < GIC_NUM_LOCAL_INTRS)
- return gic_local_irq_domain_map(d, virq, hw);
+ struct irq_chip *chip;
+ int err;
- irq_set_chip_and_handler(virq, &gic_level_irq_controller,
- handle_level_irq);
+ if (hwirq >= GIC_SHARED_HWIRQ_BASE) {
+ err = irq_domain_set_hwirq_and_chip(d, virq, hwirq,
+ &gic_level_irq_controller,
+ NULL);
+ } else {
+ switch (GIC_HWIRQ_TO_LOCAL(hwirq)) {
+ case GIC_LOCAL_INT_TIMER:
+ case GIC_LOCAL_INT_PERFCTR:
+ case GIC_LOCAL_INT_FDC:
+ /*
+ * HACK: These are all really percpu interrupts, but
+ * the rest of the MIPS kernel code does not use the
+ * percpu IRQ API for them.
+ */
+ chip = &gic_all_vpes_local_irq_controller;
+ irq_set_handler(virq, handle_percpu_irq);
+ break;
- return gic_shared_irq_domain_map(d, virq, hw, 0);
+ default:
+ chip = &gic_local_irq_controller;
+ irq_set_handler(virq, handle_percpu_devid_irq);
+ irq_set_percpu_devid(virq);
+ break;
+ }
+
+ err = irq_domain_set_hwirq_and_chip(d, virq, hwirq,
+ chip, NULL);
+ }
+
+ return err;
}
static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
@@ -744,15 +739,12 @@
int cpu, ret, i;
if (spec->type == GIC_DEVICE) {
- /* verify that it doesn't conflict with an IPI irq */
- if (test_bit(spec->hwirq, ipi_resrv))
+ /* verify that shared irqs don't conflict with an IPI irq */
+ if ((spec->hwirq >= GIC_SHARED_HWIRQ_BASE) &&
+ test_bit(GIC_HWIRQ_TO_SHARED(spec->hwirq), ipi_resrv))
return -EBUSY;
- hwirq = GIC_SHARED_TO_HWIRQ(spec->hwirq);
-
- return irq_domain_set_hwirq_and_chip(d, virq, hwirq,
- &gic_level_irq_controller,
- NULL);
+ return gic_setup_dev_chip(d, virq, spec->hwirq);
} else {
base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs);
if (base_hwirq == gic_shared_intrs) {
@@ -821,7 +813,6 @@
}
static const struct irq_domain_ops gic_irq_domain_ops = {
- .map = gic_irq_domain_map,
.alloc = gic_irq_domain_alloc,
.free = gic_irq_domain_free,
.match = gic_irq_domain_match,
@@ -852,29 +843,20 @@
struct irq_fwspec *fwspec = arg;
struct gic_irq_spec spec = {
.type = GIC_DEVICE,
- .hwirq = fwspec->param[1],
};
int i, ret;
- bool is_shared = fwspec->param[0] == GIC_SHARED;
- if (is_shared) {
- ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec);
- if (ret)
- return ret;
- }
+ if (fwspec->param[0] == GIC_SHARED)
+ spec.hwirq = GIC_SHARED_TO_HWIRQ(fwspec->param[1]);
+ else
+ spec.hwirq = GIC_LOCAL_TO_HWIRQ(fwspec->param[1]);
+
+ ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec);
+ if (ret)
+ return ret;
for (i = 0; i < nr_irqs; i++) {
- irq_hw_number_t hwirq;
-
- if (is_shared)
- hwirq = GIC_SHARED_TO_HWIRQ(spec.hwirq + i);
- else
- hwirq = GIC_LOCAL_TO_HWIRQ(spec.hwirq + i);
-
- ret = irq_domain_set_hwirq_and_chip(d, virq + i,
- hwirq,
- &gic_level_irq_controller,
- NULL);
+ ret = gic_setup_dev_chip(d, virq + i, spec.hwirq + i);
if (ret)
goto error;
}
@@ -896,7 +878,10 @@
static void gic_dev_domain_activate(struct irq_domain *domain,
struct irq_data *d)
{
- gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0);
+ if (GIC_HWIRQ_TO_LOCAL(d->hwirq) < GIC_NUM_LOCAL_INTRS)
+ gic_local_irq_domain_map(domain, d->irq, d->hwirq);
+ else
+ gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0);
}
static struct irq_domain_ops gic_dev_domain_ops = {
diff --git a/drivers/irqchip/irq-mvebu-pic.c b/drivers/irqchip/irq-mvebu-pic.c
new file mode 100644
index 0000000..eec6395
--- /dev/null
+++ b/drivers/irqchip/irq-mvebu-pic.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2016 Marvell
+ *
+ * Yehuda Yitschak <yehuday@marvell.com>
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
+#define PIC_CAUSE 0x0
+#define PIC_MASK 0x4
+
+#define PIC_MAX_IRQS 32
+#define PIC_MAX_IRQ_MASK ((1UL << PIC_MAX_IRQS) - 1)
+
+struct mvebu_pic {
+ void __iomem *base;
+ u32 parent_irq;
+ struct irq_domain *domain;
+ struct irq_chip irq_chip;
+};
+
+static void mvebu_pic_reset(struct mvebu_pic *pic)
+{
+ /* ACK and mask all interrupts */
+ writel(0, pic->base + PIC_MASK);
+ writel(PIC_MAX_IRQ_MASK, pic->base + PIC_CAUSE);
+}
+
+static void mvebu_pic_eoi_irq(struct irq_data *d)
+{
+ struct mvebu_pic *pic = irq_data_get_irq_chip_data(d);
+
+ writel(1 << d->hwirq, pic->base + PIC_CAUSE);
+}
+
+static void mvebu_pic_mask_irq(struct irq_data *d)
+{
+ struct mvebu_pic *pic = irq_data_get_irq_chip_data(d);
+ u32 reg;
+
+ reg = readl(pic->base + PIC_MASK);
+ reg |= (1 << d->hwirq);
+ writel(reg, pic->base + PIC_MASK);
+}
+
+static void mvebu_pic_unmask_irq(struct irq_data *d)
+{
+ struct mvebu_pic *pic = irq_data_get_irq_chip_data(d);
+ u32 reg;
+
+ reg = readl(pic->base + PIC_MASK);
+ reg &= ~(1 << d->hwirq);
+ writel(reg, pic->base + PIC_MASK);
+}
+
+static int mvebu_pic_irq_map(struct irq_domain *domain, unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ struct mvebu_pic *pic = domain->host_data;
+
+ irq_set_percpu_devid(virq);
+ irq_set_chip_data(virq, pic);
+ irq_set_chip_and_handler(virq, &pic->irq_chip,
+ handle_percpu_devid_irq);
+ irq_set_status_flags(virq, IRQ_LEVEL);
+ irq_set_probe(virq);
+
+ return 0;
+}
+
+static const struct irq_domain_ops mvebu_pic_domain_ops = {
+ .map = mvebu_pic_irq_map,
+ .xlate = irq_domain_xlate_onecell,
+};
+
+static void mvebu_pic_handle_cascade_irq(struct irq_desc *desc)
+{
+ struct mvebu_pic *pic = irq_desc_get_handler_data(desc);
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned long irqmap, irqn;
+ unsigned int cascade_irq;
+
+ irqmap = readl_relaxed(pic->base + PIC_CAUSE);
+ chained_irq_enter(chip, desc);
+
+ for_each_set_bit(irqn, &irqmap, BITS_PER_LONG) {
+ cascade_irq = irq_find_mapping(pic->domain, irqn);
+ generic_handle_irq(cascade_irq);
+ }
+
+ chained_irq_exit(chip, desc);
+}
+
+static void mvebu_pic_enable_percpu_irq(void *data)
+{
+ struct mvebu_pic *pic = data;
+
+ mvebu_pic_reset(pic);
+ enable_percpu_irq(pic->parent_irq, IRQ_TYPE_NONE);
+}
+
+static void mvebu_pic_disable_percpu_irq(void *data)
+{
+ struct mvebu_pic *pic = data;
+
+ disable_percpu_irq(pic->parent_irq);
+}
+
+static int mvebu_pic_probe(struct platform_device *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ struct mvebu_pic *pic;
+ struct irq_chip *irq_chip;
+ struct resource *res;
+
+ pic = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_pic), GFP_KERNEL);
+ if (!pic)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ pic->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(pic->base))
+ return PTR_ERR(pic->base);
+
+ irq_chip = &pic->irq_chip;
+ irq_chip->name = dev_name(&pdev->dev);
+ irq_chip->irq_mask = mvebu_pic_mask_irq;
+ irq_chip->irq_unmask = mvebu_pic_unmask_irq;
+ irq_chip->irq_eoi = mvebu_pic_eoi_irq;
+
+ pic->parent_irq = irq_of_parse_and_map(node, 0);
+ if (pic->parent_irq <= 0) {
+ dev_err(&pdev->dev, "Failed to parse parent interrupt\n");
+ return -EINVAL;
+ }
+
+ pic->domain = irq_domain_add_linear(node, PIC_MAX_IRQS,
+ &mvebu_pic_domain_ops, pic);
+ if (!pic->domain) {
+ dev_err(&pdev->dev, "Failed to allocate irq domain\n");
+ return -ENOMEM;
+ }
+
+ irq_set_chained_handler(pic->parent_irq, mvebu_pic_handle_cascade_irq);
+ irq_set_handler_data(pic->parent_irq, pic);
+
+ on_each_cpu(mvebu_pic_enable_percpu_irq, pic, 1);
+
+ platform_set_drvdata(pdev, pic);
+
+ return 0;
+}
+
+static int mvebu_pic_remove(struct platform_device *pdev)
+{
+ struct mvebu_pic *pic = platform_get_drvdata(pdev);
+
+ on_each_cpu(mvebu_pic_disable_percpu_irq, pic, 1);
+ irq_domain_remove(pic->domain);
+
+ return 0;
+}
+
+static const struct of_device_id mvebu_pic_of_match[] = {
+ { .compatible = "marvell,armada-8k-pic", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, mvebu_pic_of_match);
+
+static struct platform_driver mvebu_pic_driver = {
+ .probe = mvebu_pic_probe,
+ .remove = mvebu_pic_remove,
+ .driver = {
+ .name = "mvebu-pic",
+ .of_match_table = mvebu_pic_of_match,
+ },
+};
+module_platform_driver(mvebu_pic_driver);
+
+MODULE_AUTHOR("Yehuda Yitschak <yehuday@marvell.com>");
+MODULE_AUTHOR("Thomas Petazzoni <thomas.petazzoni@free-electrons.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:mvebu_pic");
+
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
new file mode 100644
index 0000000..491568c
--- /dev/null
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) Maxime Coquelin 2015
+ * Author: Maxime Coquelin <mcoquelin.stm32@gmail.com>
+ * License terms: GNU General Public License (GPL), version 2
+ */
+
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#define EXTI_IMR 0x0
+#define EXTI_EMR 0x4
+#define EXTI_RTSR 0x8
+#define EXTI_FTSR 0xc
+#define EXTI_SWIER 0x10
+#define EXTI_PR 0x14
+
+static void stm32_irq_handler(struct irq_desc *desc)
+{
+ struct irq_domain *domain = irq_desc_get_handler_data(desc);
+ struct irq_chip_generic *gc = domain->gc->gc[0];
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned long pending;
+ int n;
+
+ chained_irq_enter(chip, desc);
+
+ while ((pending = irq_reg_readl(gc, EXTI_PR))) {
+ for_each_set_bit(n, &pending, BITS_PER_LONG) {
+ generic_handle_irq(irq_find_mapping(domain, n));
+ irq_reg_writel(gc, BIT(n), EXTI_PR);
+ }
+ }
+
+ chained_irq_exit(chip, desc);
+}
+
+static int stm32_irq_set_type(struct irq_data *data, unsigned int type)
+{
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+ int pin = data->hwirq;
+ u32 rtsr, ftsr;
+
+ irq_gc_lock(gc);
+
+ rtsr = irq_reg_readl(gc, EXTI_RTSR);
+ ftsr = irq_reg_readl(gc, EXTI_FTSR);
+
+ switch (type) {
+ case IRQ_TYPE_EDGE_RISING:
+ rtsr |= BIT(pin);
+ ftsr &= ~BIT(pin);
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+ rtsr &= ~BIT(pin);
+ ftsr |= BIT(pin);
+ break;
+ case IRQ_TYPE_EDGE_BOTH:
+ rtsr |= BIT(pin);
+ ftsr |= BIT(pin);
+ break;
+ default:
+ irq_gc_unlock(gc);
+ return -EINVAL;
+ }
+
+ irq_reg_writel(gc, rtsr, EXTI_RTSR);
+ irq_reg_writel(gc, ftsr, EXTI_FTSR);
+
+ irq_gc_unlock(gc);
+
+ return 0;
+}
+
+static int stm32_irq_set_wake(struct irq_data *data, unsigned int on)
+{
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+ int pin = data->hwirq;
+ u32 emr;
+
+ irq_gc_lock(gc);
+
+ emr = irq_reg_readl(gc, EXTI_EMR);
+ if (on)
+ emr |= BIT(pin);
+ else
+ emr &= ~BIT(pin);
+ irq_reg_writel(gc, emr, EXTI_EMR);
+
+ irq_gc_unlock(gc);
+
+ return 0;
+}
+
+static int stm32_exti_alloc(struct irq_domain *d, unsigned int virq,
+ unsigned int nr_irqs, void *data)
+{
+ struct irq_chip_generic *gc = d->gc->gc[0];
+ struct irq_fwspec *fwspec = data;
+ irq_hw_number_t hwirq;
+
+ hwirq = fwspec->param[0];
+
+ irq_map_generic_chip(d, virq, hwirq);
+ irq_domain_set_info(d, virq, hwirq, &gc->chip_types->chip, gc,
+ handle_simple_irq, NULL, NULL);
+
+ return 0;
+}
+
+static void stm32_exti_free(struct irq_domain *d, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *data = irq_domain_get_irq_data(d, virq);
+
+ irq_domain_reset_irq_data(data);
+}
+
+struct irq_domain_ops irq_exti_domain_ops = {
+ .map = irq_map_generic_chip,
+ .xlate = irq_domain_xlate_onetwocell,
+ .alloc = stm32_exti_alloc,
+ .free = stm32_exti_free,
+};
+
+static int __init stm32_exti_init(struct device_node *node,
+ struct device_node *parent)
+{
+ unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
+ int nr_irqs, nr_exti, ret, i;
+ struct irq_chip_generic *gc;
+ struct irq_domain *domain;
+ void *base;
+
+ base = of_iomap(node, 0);
+ if (!base) {
+ pr_err("%s: Unable to map registers\n", node->full_name);
+ return -ENOMEM;
+ }
+
+ /* Determine number of irqs supported */
+ writel_relaxed(~0UL, base + EXTI_RTSR);
+ nr_exti = fls(readl_relaxed(base + EXTI_RTSR));
+ writel_relaxed(0, base + EXTI_RTSR);
+
+ pr_info("%s: %d External IRQs detected\n", node->full_name, nr_exti);
+
+ domain = irq_domain_add_linear(node, nr_exti,
+ &irq_exti_domain_ops, NULL);
+ if (!domain) {
+ pr_err("%s: Could not register interrupt domain.\n",
+ node->name);
+ ret = -ENOMEM;
+ goto out_unmap;
+ }
+
+ ret = irq_alloc_domain_generic_chips(domain, nr_exti, 1, "exti",
+ handle_edge_irq, clr, 0, 0);
+ if (ret) {
+ pr_err("%s: Could not allocate generic interrupt chip.\n",
+ node->full_name);
+ goto out_free_domain;
+ }
+
+ gc = domain->gc->gc[0];
+ gc->reg_base = base;
+ gc->chip_types->type = IRQ_TYPE_EDGE_BOTH;
+ gc->chip_types->chip.name = gc->chip_types[0].chip.name;
+ gc->chip_types->chip.irq_ack = irq_gc_ack_set_bit;
+ gc->chip_types->chip.irq_mask = irq_gc_mask_clr_bit;
+ gc->chip_types->chip.irq_unmask = irq_gc_mask_set_bit;
+ gc->chip_types->chip.irq_set_type = stm32_irq_set_type;
+ gc->chip_types->chip.irq_set_wake = stm32_irq_set_wake;
+ gc->chip_types->regs.ack = EXTI_PR;
+ gc->chip_types->regs.mask = EXTI_IMR;
+ gc->chip_types->handler = handle_edge_irq;
+
+ nr_irqs = of_irq_count(node);
+ for (i = 0; i < nr_irqs; i++) {
+ unsigned int irq = irq_of_parse_and_map(node, i);
+
+ irq_set_handler_data(irq, domain);
+ irq_set_chained_handler(irq, stm32_irq_handler);
+ }
+
+ return 0;
+
+out_free_domain:
+ irq_domain_remove(domain);
+out_unmap:
+ iounmap(base);
+ return ret;
+}
+
+IRQCHIP_DECLARE(stm32_exti, "st,stm32-exti", stm32_exti_init);
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 9ebd2cf..c784ddc 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -1171,27 +1171,10 @@
.nodename = "lightnvm/control",
.fops = &_ctl_fops,
};
+module_misc_device(_nvm_misc);
MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR);
-static int __init nvm_mod_init(void)
-{
- int ret;
-
- ret = misc_register(&_nvm_misc);
- if (ret)
- pr_err("nvm: misc_register failed for control device");
-
- return ret;
-}
-
-static void __exit nvm_mod_exit(void)
-{
- misc_deregister(&_nvm_misc);
-}
-
MODULE_AUTHOR("Matias Bjorling <m@bjorling.me>");
MODULE_LICENSE("GPL v2");
MODULE_VERSION("0.1");
-module_init(nvm_mod_init);
-module_exit(nvm_mod_exit);
diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c
index 043828d..08c87fa 100644
--- a/drivers/mailbox/pcc.c
+++ b/drivers/mailbox/pcc.c
@@ -59,6 +59,7 @@
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/init.h>
+#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/platform_device.h>
#include <linux/mailbox_controller.h>
@@ -68,11 +69,16 @@
#include "mailbox.h"
#define MAX_PCC_SUBSPACES 256
+#define MBOX_IRQ_NAME "pcc-mbox"
static struct mbox_chan *pcc_mbox_channels;
/* Array of cached virtual address for doorbell registers */
static void __iomem **pcc_doorbell_vaddr;
+/* Array of cached virtual address for doorbell ack registers */
+static void __iomem **pcc_doorbell_ack_vaddr;
+/* Array of doorbell interrupts */
+static int *pcc_doorbell_irq;
static struct mbox_controller pcc_mbox_ctrl = {};
/**
@@ -91,79 +97,6 @@
return &pcc_mbox_channels[id];
}
-/**
- * pcc_mbox_request_channel - PCC clients call this function to
- * request a pointer to their PCC subspace, from which they
- * can get the details of communicating with the remote.
- * @cl: Pointer to Mailbox client, so we know where to bind the
- * Channel.
- * @subspace_id: The PCC Subspace index as parsed in the PCC client
- * ACPI package. This is used to lookup the array of PCC
- * subspaces as parsed by the PCC Mailbox controller.
- *
- * Return: Pointer to the Mailbox Channel if successful or
- * ERR_PTR.
- */
-struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,
- int subspace_id)
-{
- struct device *dev = pcc_mbox_ctrl.dev;
- struct mbox_chan *chan;
- unsigned long flags;
-
- /*
- * Each PCC Subspace is a Mailbox Channel.
- * The PCC Clients get their PCC Subspace ID
- * from their own tables and pass it here.
- * This returns a pointer to the PCC subspace
- * for the Client to operate on.
- */
- chan = get_pcc_channel(subspace_id);
-
- if (IS_ERR(chan) || chan->cl) {
- dev_err(dev, "Channel not found for idx: %d\n", subspace_id);
- return ERR_PTR(-EBUSY);
- }
-
- spin_lock_irqsave(&chan->lock, flags);
- chan->msg_free = 0;
- chan->msg_count = 0;
- chan->active_req = NULL;
- chan->cl = cl;
- init_completion(&chan->tx_complete);
-
- if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone)
- chan->txdone_method |= TXDONE_BY_ACK;
-
- spin_unlock_irqrestore(&chan->lock, flags);
-
- return chan;
-}
-EXPORT_SYMBOL_GPL(pcc_mbox_request_channel);
-
-/**
- * pcc_mbox_free_channel - Clients call this to free their Channel.
- *
- * @chan: Pointer to the mailbox channel as returned by
- * pcc_mbox_request_channel()
- */
-void pcc_mbox_free_channel(struct mbox_chan *chan)
-{
- unsigned long flags;
-
- if (!chan || !chan->cl)
- return;
-
- spin_lock_irqsave(&chan->lock, flags);
- chan->cl = NULL;
- chan->active_req = NULL;
- if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
- chan->txdone_method = TXDONE_BY_POLL;
-
- spin_unlock_irqrestore(&chan->lock, flags);
-}
-EXPORT_SYMBOL_GPL(pcc_mbox_free_channel);
-
/*
* PCC can be used with perf critical drivers such as CPPC
* So it makes sense to locally cache the virtual address and
@@ -225,6 +158,167 @@
}
/**
+ * pcc_map_interrupt - Map a PCC subspace GSI to a linux IRQ number
+ * @interrupt: GSI number.
+ * @flags: interrupt flags
+ *
+ * Returns: a valid linux IRQ number on success
+ * 0 or -EINVAL on failure
+ */
+static int pcc_map_interrupt(u32 interrupt, u32 flags)
+{
+ int trigger, polarity;
+
+ if (!interrupt)
+ return 0;
+
+ trigger = (flags & ACPI_PCCT_INTERRUPT_MODE) ? ACPI_EDGE_SENSITIVE
+ : ACPI_LEVEL_SENSITIVE;
+
+ polarity = (flags & ACPI_PCCT_INTERRUPT_POLARITY) ? ACPI_ACTIVE_LOW
+ : ACPI_ACTIVE_HIGH;
+
+ return acpi_register_gsi(NULL, interrupt, trigger, polarity);
+}
+
+/**
+ * pcc_mbox_irq - PCC mailbox interrupt handler
+ */
+static irqreturn_t pcc_mbox_irq(int irq, void *p)
+{
+ struct acpi_generic_address *doorbell_ack;
+ struct acpi_pcct_hw_reduced *pcct_ss;
+ struct mbox_chan *chan = p;
+ u64 doorbell_ack_preserve;
+ u64 doorbell_ack_write;
+ u64 doorbell_ack_val;
+ int ret;
+
+ pcct_ss = chan->con_priv;
+
+ mbox_chan_received_data(chan, NULL);
+
+ if (pcct_ss->header.type == ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2) {
+ struct acpi_pcct_hw_reduced_type2 *pcct2_ss = chan->con_priv;
+ u32 id = chan - pcc_mbox_channels;
+
+ doorbell_ack = &pcct2_ss->doorbell_ack_register;
+ doorbell_ack_preserve = pcct2_ss->ack_preserve_mask;
+ doorbell_ack_write = pcct2_ss->ack_write_mask;
+
+ ret = read_register(pcc_doorbell_ack_vaddr[id],
+ &doorbell_ack_val,
+ doorbell_ack->bit_width);
+ if (ret)
+ return IRQ_NONE;
+
+ ret = write_register(pcc_doorbell_ack_vaddr[id],
+ (doorbell_ack_val & doorbell_ack_preserve)
+ | doorbell_ack_write,
+ doorbell_ack->bit_width);
+ if (ret)
+ return IRQ_NONE;
+ }
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * pcc_mbox_request_channel - PCC clients call this function to
+ * request a pointer to their PCC subspace, from which they
+ * can get the details of communicating with the remote.
+ * @cl: Pointer to Mailbox client, so we know where to bind the
+ * Channel.
+ * @subspace_id: The PCC Subspace index as parsed in the PCC client
+ * ACPI package. This is used to lookup the array of PCC
+ * subspaces as parsed by the PCC Mailbox controller.
+ *
+ * Return: Pointer to the Mailbox Channel if successful or
+ * ERR_PTR.
+ */
+struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,
+ int subspace_id)
+{
+ struct device *dev = pcc_mbox_ctrl.dev;
+ struct mbox_chan *chan;
+ unsigned long flags;
+
+ /*
+ * Each PCC Subspace is a Mailbox Channel.
+ * The PCC Clients get their PCC Subspace ID
+ * from their own tables and pass it here.
+ * This returns a pointer to the PCC subspace
+ * for the Client to operate on.
+ */
+ chan = get_pcc_channel(subspace_id);
+
+ if (IS_ERR(chan) || chan->cl) {
+ dev_err(dev, "Channel not found for idx: %d\n", subspace_id);
+ return ERR_PTR(-EBUSY);
+ }
+
+ spin_lock_irqsave(&chan->lock, flags);
+ chan->msg_free = 0;
+ chan->msg_count = 0;
+ chan->active_req = NULL;
+ chan->cl = cl;
+ init_completion(&chan->tx_complete);
+
+ if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone)
+ chan->txdone_method |= TXDONE_BY_ACK;
+
+ if (pcc_doorbell_irq[subspace_id] > 0) {
+ int rc;
+
+ rc = devm_request_irq(dev, pcc_doorbell_irq[subspace_id],
+ pcc_mbox_irq, 0, MBOX_IRQ_NAME, chan);
+ if (unlikely(rc)) {
+ dev_err(dev, "failed to register PCC interrupt %d\n",
+ pcc_doorbell_irq[subspace_id]);
+ chan = ERR_PTR(rc);
+ }
+ }
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ return chan;
+}
+EXPORT_SYMBOL_GPL(pcc_mbox_request_channel);
+
+/**
+ * pcc_mbox_free_channel - Clients call this to free their Channel.
+ *
+ * @chan: Pointer to the mailbox channel as returned by
+ * pcc_mbox_request_channel()
+ */
+void pcc_mbox_free_channel(struct mbox_chan *chan)
+{
+ u32 id = chan - pcc_mbox_channels;
+ unsigned long flags;
+
+ if (!chan || !chan->cl)
+ return;
+
+ if (id >= pcc_mbox_ctrl.num_chans) {
+ pr_debug("pcc_mbox_free_channel: Invalid mbox_chan passed\n");
+ return;
+ }
+
+ spin_lock_irqsave(&chan->lock, flags);
+ chan->cl = NULL;
+ chan->active_req = NULL;
+ if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
+ chan->txdone_method = TXDONE_BY_POLL;
+
+ if (pcc_doorbell_irq[id] > 0)
+ devm_free_irq(chan->mbox->dev, pcc_doorbell_irq[id], chan);
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+EXPORT_SYMBOL_GPL(pcc_mbox_free_channel);
+
+
+/**
* pcc_send_data - Called from Mailbox Controller code. Used
* here only to ring the channel doorbell. The PCC client
* specific read/write is done in the client driver in
@@ -296,8 +390,10 @@
if (pcc_mbox_ctrl.num_chans <= MAX_PCC_SUBSPACES) {
pcct_ss = (struct acpi_pcct_hw_reduced *) header;
- if (pcct_ss->header.type !=
- ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE) {
+ if ((pcct_ss->header.type !=
+ ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE)
+ && (pcct_ss->header.type !=
+ ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2)) {
pr_err("Incorrect PCC Subspace type detected\n");
return -EINVAL;
}
@@ -307,6 +403,43 @@
}
/**
+ * pcc_parse_subspace_irq - Parse the PCC IRQ and PCC ACK register
+ * There should be one entry per PCC client.
+ * @id: PCC subspace index.
+ * @pcct_ss: Pointer to the ACPI subtable header under the PCCT.
+ *
+ * Return: 0 for Success, else errno.
+ *
+ * This gets called for each entry in the PCC table.
+ */
+static int pcc_parse_subspace_irq(int id,
+ struct acpi_pcct_hw_reduced *pcct_ss)
+{
+ pcc_doorbell_irq[id] = pcc_map_interrupt(pcct_ss->doorbell_interrupt,
+ (u32)pcct_ss->flags);
+ if (pcc_doorbell_irq[id] <= 0) {
+ pr_err("PCC GSI %d not registered\n",
+ pcct_ss->doorbell_interrupt);
+ return -EINVAL;
+ }
+
+ if (pcct_ss->header.type
+ == ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2) {
+ struct acpi_pcct_hw_reduced_type2 *pcct2_ss = (void *)pcct_ss;
+
+ pcc_doorbell_ack_vaddr[id] = acpi_os_ioremap(
+ pcct2_ss->doorbell_ack_register.address,
+ pcct2_ss->doorbell_ack_register.bit_width / 8);
+ if (!pcc_doorbell_ack_vaddr[id]) {
+ pr_err("Failed to ioremap PCC ACK register\n");
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+/**
* acpi_pcc_probe - Parse the ACPI tree for the PCCT.
*
* Return: 0 for Success, else errno.
@@ -316,7 +449,9 @@
acpi_size pcct_tbl_header_size;
struct acpi_table_header *pcct_tbl;
struct acpi_subtable_header *pcct_entry;
- int count, i;
+ struct acpi_table_pcct *acpi_pcct_tbl;
+ int count, i, rc;
+ int sum = 0;
acpi_status status = AE_OK;
/* Search for PCCT */
@@ -333,37 +468,66 @@
sizeof(struct acpi_table_pcct),
ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE,
parse_pcc_subspace, MAX_PCC_SUBSPACES);
+ sum += (count > 0) ? count : 0;
- if (count <= 0) {
+ count = acpi_table_parse_entries(ACPI_SIG_PCCT,
+ sizeof(struct acpi_table_pcct),
+ ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2,
+ parse_pcc_subspace, MAX_PCC_SUBSPACES);
+ sum += (count > 0) ? count : 0;
+
+ if (sum == 0 || sum >= MAX_PCC_SUBSPACES) {
pr_err("Error parsing PCC subspaces from PCCT\n");
return -EINVAL;
}
pcc_mbox_channels = kzalloc(sizeof(struct mbox_chan) *
- count, GFP_KERNEL);
-
+ sum, GFP_KERNEL);
if (!pcc_mbox_channels) {
pr_err("Could not allocate space for PCC mbox channels\n");
return -ENOMEM;
}
- pcc_doorbell_vaddr = kcalloc(count, sizeof(void *), GFP_KERNEL);
+ pcc_doorbell_vaddr = kcalloc(sum, sizeof(void *), GFP_KERNEL);
if (!pcc_doorbell_vaddr) {
- kfree(pcc_mbox_channels);
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto err_free_mbox;
+ }
+
+ pcc_doorbell_ack_vaddr = kcalloc(sum, sizeof(void *), GFP_KERNEL);
+ if (!pcc_doorbell_ack_vaddr) {
+ rc = -ENOMEM;
+ goto err_free_db_vaddr;
+ }
+
+ pcc_doorbell_irq = kcalloc(sum, sizeof(int), GFP_KERNEL);
+ if (!pcc_doorbell_irq) {
+ rc = -ENOMEM;
+ goto err_free_db_ack_vaddr;
}
/* Point to the first PCC subspace entry */
pcct_entry = (struct acpi_subtable_header *) (
(unsigned long) pcct_tbl + sizeof(struct acpi_table_pcct));
- for (i = 0; i < count; i++) {
+ acpi_pcct_tbl = (struct acpi_table_pcct *) pcct_tbl;
+ if (acpi_pcct_tbl->flags & ACPI_PCCT_DOORBELL)
+ pcc_mbox_ctrl.txdone_irq = true;
+
+ for (i = 0; i < sum; i++) {
struct acpi_generic_address *db_reg;
struct acpi_pcct_hw_reduced *pcct_ss;
pcc_mbox_channels[i].con_priv = pcct_entry;
+ pcct_ss = (struct acpi_pcct_hw_reduced *) pcct_entry;
+
+ if (pcc_mbox_ctrl.txdone_irq) {
+ rc = pcc_parse_subspace_irq(i, pcct_ss);
+ if (rc < 0)
+ goto err;
+ }
+
/* If doorbell is in system memory cache the virt address */
- pcct_ss = (struct acpi_pcct_hw_reduced *)pcct_entry;
db_reg = &pcct_ss->doorbell_register;
if (db_reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
pcc_doorbell_vaddr[i] = acpi_os_ioremap(db_reg->address,
@@ -372,11 +536,21 @@
((unsigned long) pcct_entry + pcct_entry->length);
}
- pcc_mbox_ctrl.num_chans = count;
+ pcc_mbox_ctrl.num_chans = sum;
pr_info("Detected %d PCC Subspaces\n", pcc_mbox_ctrl.num_chans);
return 0;
+
+err:
+ kfree(pcc_doorbell_irq);
+err_free_db_ack_vaddr:
+ kfree(pcc_doorbell_ack_vaddr);
+err_free_db_vaddr:
+ kfree(pcc_doorbell_vaddr);
+err_free_mbox:
+ kfree(pcc_mbox_channels);
+ return rc;
}
/**
diff --git a/drivers/mcb/Kconfig b/drivers/mcb/Kconfig
index e9a6976..76d9c51 100644
--- a/drivers/mcb/Kconfig
+++ b/drivers/mcb/Kconfig
@@ -28,4 +28,13 @@
If build as a module, the module is called mcb-pci.ko
+config MCB_LPC
+ tristate "LPC (non PCI) based MCB carrier"
+ default n
+ help
+
+ This is a MCB carrier on a LPC or non PCI device.
+
+ If build as a module, the module is called mcb-lpc.ko
+
endif # MCB
diff --git a/drivers/mcb/Makefile b/drivers/mcb/Makefile
index 1ae1413..bcc7745 100644
--- a/drivers/mcb/Makefile
+++ b/drivers/mcb/Makefile
@@ -5,3 +5,4 @@
mcb-y += mcb-parse.o
obj-$(CONFIG_MCB_PCI) += mcb-pci.o
+obj-$(CONFIG_MCB_LPC) += mcb-lpc.o
diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c
index 6f2c852..921a5d2 100644
--- a/drivers/mcb/mcb-core.c
+++ b/drivers/mcb/mcb-core.c
@@ -233,6 +233,7 @@
dev->dev.bus = &mcb_bus_type;
dev->dev.parent = bus->dev.parent;
dev->dev.release = mcb_release_dev;
+ dev->dma_dev = bus->carrier;
device_id = dev->id;
dev_set_name(&dev->dev, "mcb%d-16z%03d-%d:%d:%d",
@@ -369,7 +370,6 @@
if (!dev)
return NULL;
- INIT_LIST_HEAD(&dev->bus_list);
dev->bus = bus;
return dev;
@@ -405,20 +405,6 @@
return 0;
}
-static int __mcb_bus_add_child(struct device *dev, void *data)
-{
- struct mcb_device *mdev = to_mcb_device(dev);
- struct mcb_bus *child;
-
- BUG_ON(!mdev->is_added);
- child = mdev->subordinate;
-
- if (child)
- mcb_bus_add_devices(child);
-
- return 0;
-}
-
/**
* mcb_bus_add_devices() - Add devices in the bus' internal device list
* @bus: The @mcb_bus we add the devices
@@ -428,8 +414,6 @@
void mcb_bus_add_devices(const struct mcb_bus *bus)
{
bus_for_each_dev(&mcb_bus_type, NULL, NULL, __mcb_bus_add_devices);
- bus_for_each_dev(&mcb_bus_type, NULL, NULL, __mcb_bus_add_child);
-
}
EXPORT_SYMBOL_GPL(mcb_bus_add_devices);
diff --git a/drivers/mcb/mcb-internal.h b/drivers/mcb/mcb-internal.h
index 5254e02..d6e6933 100644
--- a/drivers/mcb/mcb-internal.h
+++ b/drivers/mcb/mcb-internal.h
@@ -112,6 +112,15 @@
u32 size;
} __packed;
+struct chameleon_bar {
+ u32 addr;
+ u32 size;
+};
+
+#define BAR_CNT(x) ((x) & 0x07)
+#define CHAMELEON_BAR_MAX 6
+#define BAR_DESC_SIZE(x) ((x) * sizeof(struct chameleon_bar) + sizeof(__le32))
+
int chameleon_parse_cells(struct mcb_bus *bus, phys_addr_t mapbase,
void __iomem *base);
diff --git a/drivers/mcb/mcb-lpc.c b/drivers/mcb/mcb-lpc.c
new file mode 100644
index 0000000..d072c08
--- /dev/null
+++ b/drivers/mcb/mcb-lpc.c
@@ -0,0 +1,158 @@
+/*
+ * MEN Chameleon Bus.
+ *
+ * Copyright (C) 2014 MEN Mikroelektronik GmbH (www.men.de)
+ * Author: Andreas Werner <andreas.werner@men.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2 of the License.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/mcb.h>
+#include <linux/io.h>
+#include "mcb-internal.h"
+
+struct priv {
+ struct mcb_bus *bus;
+ struct resource *mem;
+ void __iomem *base;
+};
+
+static int mcb_lpc_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ struct priv *priv;
+ int ret = 0;
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!priv->mem) {
+ dev_err(&pdev->dev, "No Memory resource\n");
+ return -ENODEV;
+ }
+
+ res = devm_request_mem_region(&pdev->dev, priv->mem->start,
+ resource_size(priv->mem),
+ KBUILD_MODNAME);
+ if (!res) {
+ dev_err(&pdev->dev, "Failed to request IO memory\n");
+ return -EBUSY;
+ }
+
+ priv->base = devm_ioremap(&pdev->dev, priv->mem->start,
+ resource_size(priv->mem));
+ if (!priv->base) {
+ dev_err(&pdev->dev, "Cannot ioremap\n");
+ return -ENOMEM;
+ }
+
+ platform_set_drvdata(pdev, priv);
+
+ priv->bus = mcb_alloc_bus(&pdev->dev);
+ if (IS_ERR(priv->bus))
+ return PTR_ERR(priv->bus);
+
+ ret = chameleon_parse_cells(priv->bus, priv->mem->start, priv->base);
+ if (ret < 0) {
+ mcb_release_bus(priv->bus);
+ return ret;
+ }
+
+ dev_dbg(&pdev->dev, "Found %d cells\n", ret);
+
+ mcb_bus_add_devices(priv->bus);
+
+ return 0;
+
+}
+
+static int mcb_lpc_remove(struct platform_device *pdev)
+{
+ struct priv *priv = platform_get_drvdata(pdev);
+
+ mcb_release_bus(priv->bus);
+
+ return 0;
+}
+
+static struct platform_device *mcb_lpc_pdev;
+
+static int mcb_lpc_create_platform_device(const struct dmi_system_id *id)
+{
+ struct resource *res = id->driver_data;
+ int ret;
+
+ mcb_lpc_pdev = platform_device_alloc("mcb-lpc", -1);
+ if (!mcb_lpc_pdev)
+ return -ENOMEM;
+
+ ret = platform_device_add_resources(mcb_lpc_pdev, res, 1);
+ if (ret)
+ goto out_put;
+
+ ret = platform_device_add(mcb_lpc_pdev);
+ if (ret)
+ goto out_put;
+
+ return 0;
+
+out_put:
+ platform_device_put(mcb_lpc_pdev);
+ return ret;
+}
+
+static struct resource sc24_fpga_resource = {
+ .start = 0xe000e000,
+ .end = 0xe000e000 + CHAM_HEADER_SIZE,
+ .flags = IORESOURCE_MEM,
+};
+
+static struct platform_driver mcb_lpc_driver = {
+ .driver = {
+ .name = "mcb-lpc",
+ },
+ .probe = mcb_lpc_probe,
+ .remove = mcb_lpc_remove,
+};
+
+static const struct dmi_system_id mcb_lpc_dmi_table[] = {
+ {
+ .ident = "SC24",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "MEN"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "14SC24"),
+ },
+ .driver_data = (void *)&sc24_fpga_resource,
+ .callback = mcb_lpc_create_platform_device,
+ },
+ {}
+};
+MODULE_DEVICE_TABLE(dmi, mcb_lpc_dmi_table);
+
+static int __init mcb_lpc_init(void)
+{
+ if (!dmi_check_system(mcb_lpc_dmi_table))
+ return -ENODEV;
+
+ return platform_driver_register(&mcb_lpc_driver);
+}
+
+static void __exit mcb_lpc_exit(void)
+{
+ platform_device_unregister(mcb_lpc_pdev);
+ platform_driver_unregister(&mcb_lpc_driver);
+}
+
+module_init(mcb_lpc_init);
+module_exit(mcb_lpc_exit);
+
+MODULE_AUTHOR("Andreas Werner <andreas.werner@men.de>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MCB over LPC support");
diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c
index dbecbed..4ca2739 100644
--- a/drivers/mcb/mcb-parse.c
+++ b/drivers/mcb/mcb-parse.c
@@ -26,19 +26,20 @@
}
static int chameleon_parse_bdd(struct mcb_bus *bus,
- phys_addr_t mapbase,
+ struct chameleon_bar *cb,
void __iomem *base)
{
return 0;
}
static int chameleon_parse_gdd(struct mcb_bus *bus,
- phys_addr_t mapbase,
- void __iomem *base)
+ struct chameleon_bar *cb,
+ void __iomem *base, int bar_count)
{
struct chameleon_gdd __iomem *gdd =
(struct chameleon_gdd __iomem *) base;
struct mcb_device *mdev;
+ u32 dev_mapbase;
u32 offset;
u32 size;
int ret;
@@ -61,13 +62,39 @@
mdev->group = GDD_GRP(reg2);
mdev->inst = GDD_INS(reg2);
+ /*
+ * If the BAR is missing, dev_mapbase is zero, or if the
+ * device is IO mapped we just print a warning and go on with the
+ * next device, instead of completely stop the gdd parser
+ */
+ if (mdev->bar > bar_count - 1) {
+ pr_info("No BAR for 16z%03d\n", mdev->id);
+ ret = 0;
+ goto err;
+ }
+
+ dev_mapbase = cb[mdev->bar].addr;
+ if (!dev_mapbase) {
+ pr_info("BAR not assigned for 16z%03d\n", mdev->id);
+ ret = 0;
+ goto err;
+ }
+
+ if (dev_mapbase & 0x01) {
+ pr_info("IO mapped Device (16z%03d) not yet supported\n",
+ mdev->id);
+ ret = 0;
+ goto err;
+ }
+
pr_debug("Found a 16z%03d\n", mdev->id);
mdev->irq.start = GDD_IRQ(reg1);
mdev->irq.end = GDD_IRQ(reg1);
mdev->irq.flags = IORESOURCE_IRQ;
- mdev->mem.start = mapbase + offset;
+ mdev->mem.start = dev_mapbase + offset;
+
mdev->mem.end = mdev->mem.start + size - 1;
mdev->mem.flags = IORESOURCE_MEM;
@@ -85,13 +112,76 @@
return ret;
}
+static void chameleon_parse_bar(void __iomem *base,
+ struct chameleon_bar *cb, int bar_count)
+{
+ char __iomem *p = base;
+ int i;
+
+ /* skip reg1 */
+ p += sizeof(__le32);
+
+ for (i = 0; i < bar_count; i++) {
+ cb[i].addr = readl(p);
+ cb[i].size = readl(p + 4);
+
+ p += sizeof(struct chameleon_bar);
+ }
+}
+
+static int chameleon_get_bar(char __iomem **base, phys_addr_t mapbase,
+ struct chameleon_bar **cb)
+{
+ struct chameleon_bar *c;
+ int bar_count;
+ __le32 reg;
+ u32 dtype;
+
+ /*
+ * For those devices which are not connected
+ * to the PCI Bus (e.g. LPC) there is a bar
+ * descriptor located directly after the
+ * chameleon header. This header is comparable
+ * to a PCI header.
+ */
+ dtype = get_next_dtype(*base);
+ if (dtype == CHAMELEON_DTYPE_BAR) {
+ reg = readl(*base);
+
+ bar_count = BAR_CNT(reg);
+ if (bar_count <= 0 && bar_count > CHAMELEON_BAR_MAX)
+ return -ENODEV;
+
+ c = kcalloc(bar_count, sizeof(struct chameleon_bar),
+ GFP_KERNEL);
+ if (!c)
+ return -ENOMEM;
+
+ chameleon_parse_bar(*base, c, bar_count);
+ *base += BAR_DESC_SIZE(bar_count);
+ } else {
+ c = kzalloc(sizeof(struct chameleon_bar), GFP_KERNEL);
+ if (!c)
+ return -ENOMEM;
+
+ bar_count = 1;
+ c->addr = mapbase;
+ }
+
+ *cb = c;
+
+ return bar_count;
+}
+
int chameleon_parse_cells(struct mcb_bus *bus, phys_addr_t mapbase,
void __iomem *base)
{
- char __iomem *p = base;
struct chameleon_fpga_header *header;
- uint32_t dtype;
+ struct chameleon_bar *cb;
+ char __iomem *p = base;
int num_cells = 0;
+ uint32_t dtype;
+ int bar_count;
int ret = 0;
u32 hsize;
@@ -108,8 +198,8 @@
if (header->magic != CHAMELEONV2_MAGIC) {
pr_err("Unsupported chameleon version 0x%x\n",
header->magic);
- kfree(header);
- return -ENODEV;
+ ret = -ENODEV;
+ goto free_header;
}
p += hsize;
@@ -119,16 +209,20 @@
snprintf(bus->name, CHAMELEON_FILENAME_LEN + 1, "%s",
header->filename);
+ bar_count = chameleon_get_bar(&p, mapbase, &cb);
+ if (bar_count < 0)
+ goto free_header;
+
for_each_chameleon_cell(dtype, p) {
switch (dtype) {
case CHAMELEON_DTYPE_GENERAL:
- ret = chameleon_parse_gdd(bus, mapbase, p);
+ ret = chameleon_parse_gdd(bus, cb, p, bar_count);
if (ret < 0)
- goto out;
+ goto free_bar;
p += sizeof(struct chameleon_gdd);
break;
case CHAMELEON_DTYPE_BRIDGE:
- chameleon_parse_bdd(bus, mapbase, p);
+ chameleon_parse_bdd(bus, cb, p);
p += sizeof(struct chameleon_bdd);
break;
case CHAMELEON_DTYPE_END:
@@ -136,8 +230,8 @@
default:
pr_err("Invalid chameleon descriptor type 0x%x\n",
dtype);
- kfree(header);
- return -EINVAL;
+ ret = -EINVAL;
+ goto free_bar;
}
num_cells++;
}
@@ -145,11 +239,15 @@
if (num_cells == 0)
num_cells = -EINVAL;
+ kfree(cb);
kfree(header);
return num_cells;
-out:
+free_bar:
+ kfree(cb);
+free_header:
kfree(header);
+
return ret;
}
EXPORT_SYMBOL_GPL(chameleon_parse_cells);
diff --git a/drivers/mcb/mcb-pci.c b/drivers/mcb/mcb-pci.c
index b15a034..af4d2f2 100644
--- a/drivers/mcb/mcb-pci.c
+++ b/drivers/mcb/mcb-pci.c
@@ -46,6 +46,7 @@
dev_err(&pdev->dev, "Failed to enable PCI device\n");
return -ENODEV;
}
+ pci_set_master(pdev);
priv->mapbase = pci_resource_start(pdev, 0);
if (!priv->mapbase) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ee7fc37..5287e79 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6349,22 +6349,20 @@
return 0;
}
+static int raid456_cpu_dead(unsigned int cpu, struct hlist_node *node)
+{
+ struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node);
+
+ free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
+ return 0;
+}
+
static void raid5_free_percpu(struct r5conf *conf)
{
- unsigned long cpu;
-
if (!conf->percpu)
return;
-#ifdef CONFIG_HOTPLUG_CPU
- unregister_cpu_notifier(&conf->cpu_notify);
-#endif
-
- get_online_cpus();
- for_each_possible_cpu(cpu)
- free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
- put_online_cpus();
-
+ cpuhp_state_remove_instance(CPUHP_MD_RAID5_PREPARE, &conf->node);
free_percpu(conf->percpu);
}
@@ -6383,64 +6381,28 @@
kfree(conf);
}
-#ifdef CONFIG_HOTPLUG_CPU
-static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
- void *hcpu)
+static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
{
- struct r5conf *conf = container_of(nfb, struct r5conf, cpu_notify);
- long cpu = (long)hcpu;
+ struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node);
struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- if (alloc_scratch_buffer(conf, percpu)) {
- pr_err("%s: failed memory allocation for cpu%ld\n",
- __func__, cpu);
- return notifier_from_errno(-ENOMEM);
- }
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
- break;
- default:
- break;
+ if (alloc_scratch_buffer(conf, percpu)) {
+ pr_err("%s: failed memory allocation for cpu%u\n",
+ __func__, cpu);
+ return -ENOMEM;
}
- return NOTIFY_OK;
+ return 0;
}
-#endif
static int raid5_alloc_percpu(struct r5conf *conf)
{
- unsigned long cpu;
int err = 0;
conf->percpu = alloc_percpu(struct raid5_percpu);
if (!conf->percpu)
return -ENOMEM;
-#ifdef CONFIG_HOTPLUG_CPU
- conf->cpu_notify.notifier_call = raid456_cpu_notify;
- conf->cpu_notify.priority = 0;
- err = register_cpu_notifier(&conf->cpu_notify);
- if (err)
- return err;
-#endif
-
- get_online_cpus();
- for_each_present_cpu(cpu) {
- err = alloc_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
- if (err) {
- pr_err("%s: failed memory allocation for cpu%ld\n",
- __func__, cpu);
- break;
- }
- }
- put_online_cpus();
-
+ err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node);
if (!err) {
conf->scribble_disks = max(conf->raid_disks,
conf->previous_raid_disks);
@@ -7985,10 +7947,21 @@
static int __init raid5_init(void)
{
+ int ret;
+
raid5_wq = alloc_workqueue("raid5wq",
WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0);
if (!raid5_wq)
return -ENOMEM;
+
+ ret = cpuhp_setup_state_multi(CPUHP_MD_RAID5_PREPARE,
+ "md/raid5:prepare",
+ raid456_cpu_up_prepare,
+ raid456_cpu_dead);
+ if (ret) {
+ destroy_workqueue(raid5_wq);
+ return ret;
+ }
register_md_personality(&raid6_personality);
register_md_personality(&raid5_personality);
register_md_personality(&raid4_personality);
@@ -8000,6 +7973,7 @@
unregister_md_personality(&raid6_personality);
unregister_md_personality(&raid5_personality);
unregister_md_personality(&raid4_personality);
+ cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE);
destroy_workqueue(raid5_wq);
}
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 517d4b6..57ec49f 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -512,9 +512,7 @@
} __percpu *percpu;
int scribble_disks;
int scribble_sectors;
-#ifdef CONFIG_HOTPLUG_CPU
- struct notifier_block cpu_notify;
-#endif
+ struct hlist_node node;
/*
* Free stripes pool
diff --git a/drivers/media/cec-edid.c b/drivers/media/cec-edid.c
index 7001824..5719b99 100644
--- a/drivers/media/cec-edid.c
+++ b/drivers/media/cec-edid.c
@@ -70,7 +70,10 @@
u8 tag = edid[i] >> 5;
u8 len = edid[i] & 0x1f;
- if (tag == 3 && len >= 5 && i + len <= end)
+ if (tag == 3 && len >= 5 && i + len <= end &&
+ edid[i + 1] == 0x03 &&
+ edid[i + 2] == 0x0c &&
+ edid[i + 3] == 0x00)
return i + 4;
i += len + 1;
} while (i < end);
diff --git a/drivers/media/pci/cx23885/cx23885-417.c b/drivers/media/pci/cx23885/cx23885-417.c
index efec2d1..4d080da 100644
--- a/drivers/media/pci/cx23885/cx23885-417.c
+++ b/drivers/media/pci/cx23885/cx23885-417.c
@@ -1552,6 +1552,7 @@
q->mem_ops = &vb2_dma_sg_memops;
q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
q->lock = &dev->lock;
+ q->dev = &dev->pci->dev;
err = vb2_queue_init(q);
if (err < 0)
diff --git a/drivers/media/pci/saa7134/saa7134-dvb.c b/drivers/media/pci/saa7134/saa7134-dvb.c
index db987e5..59a4b5f 100644
--- a/drivers/media/pci/saa7134/saa7134-dvb.c
+++ b/drivers/media/pci/saa7134/saa7134-dvb.c
@@ -1238,6 +1238,7 @@
q->buf_struct_size = sizeof(struct saa7134_buf);
q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
q->lock = &dev->lock;
+ q->dev = &dev->pci->dev;
ret = vb2_queue_init(q);
if (ret) {
vb2_dvb_dealloc_frontends(&dev->frontends);
diff --git a/drivers/media/pci/saa7134/saa7134-empress.c b/drivers/media/pci/saa7134/saa7134-empress.c
index ca417a4..791a516 100644
--- a/drivers/media/pci/saa7134/saa7134-empress.c
+++ b/drivers/media/pci/saa7134/saa7134-empress.c
@@ -295,6 +295,7 @@
q->buf_struct_size = sizeof(struct saa7134_buf);
q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
q->lock = &dev->lock;
+ q->dev = &dev->pci->dev;
err = vb2_queue_init(q);
if (err)
return err;
diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index f25344b..552b635 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -169,7 +169,7 @@
config VIDEO_MEDIATEK_VCODEC
tristate "Mediatek Video Codec driver"
depends on MTK_IOMMU || COMPILE_TEST
- depends on VIDEO_DEV && VIDEO_V4L2
+ depends on VIDEO_DEV && VIDEO_V4L2 && HAS_DMA
depends on ARCH_MEDIATEK || COMPILE_TEST
select VIDEOBUF2_DMA_CONTIG
select V4L2_MEM2MEM_DEV
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
index 94f0a42..3a8e695 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
@@ -23,7 +23,6 @@
#include <media/v4l2-ioctl.h>
#include <media/videobuf2-core.h>
-#include "mtk_vcodec_util.h"
#define MTK_VCODEC_DRV_NAME "mtk_vcodec_drv"
#define MTK_VCODEC_ENC_NAME "mtk-vcodec-enc"
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
index 3ed3f2d..2c5719a 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
@@ -487,7 +487,6 @@
struct mtk_q_data *q_data;
int ret, i;
struct mtk_video_fmt *fmt;
- unsigned int pitch_w_div16;
struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp;
vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type);
@@ -530,15 +529,6 @@
q_data->coded_width = f->fmt.pix_mp.width;
q_data->coded_height = f->fmt.pix_mp.height;
- pitch_w_div16 = DIV_ROUND_UP(q_data->visible_width, 16);
- if (pitch_w_div16 % 8 != 0) {
- /* Adjust returned width/height, so application could correctly
- * allocate hw required memory
- */
- q_data->visible_height += 32;
- vidioc_try_fmt(f, q_data->fmt);
- }
-
q_data->field = f->fmt.pix_mp.field;
ctx->colorspace = f->fmt.pix_mp.colorspace;
ctx->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc;
@@ -878,7 +868,8 @@
{
struct mtk_vcodec_ctx *ctx = priv;
int ret;
- struct vb2_buffer *dst_buf;
+ struct vb2_buffer *src_buf, *dst_buf;
+ struct vb2_v4l2_buffer *dst_vb2_v4l2, *src_vb2_v4l2;
struct mtk_vcodec_mem bs_buf;
struct venc_done_result enc_result;
@@ -911,6 +902,15 @@
mtk_v4l2_err("venc_if_encode failed=%d", ret);
return -EINVAL;
}
+ src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx);
+ if (src_buf) {
+ src_vb2_v4l2 = to_vb2_v4l2_buffer(src_buf);
+ dst_vb2_v4l2 = to_vb2_v4l2_buffer(dst_buf);
+ dst_buf->timestamp = src_buf->timestamp;
+ dst_vb2_v4l2->timecode = src_vb2_v4l2->timecode;
+ } else {
+ mtk_v4l2_err("No timestamp for the header buffer.");
+ }
ctx->state = MTK_STATE_HEADER;
dst_buf->planes[0].bytesused = enc_result.bs_size;
@@ -1003,7 +1003,7 @@
struct mtk_vcodec_mem bs_buf;
struct venc_done_result enc_result;
int ret, i;
- struct vb2_v4l2_buffer *vb2_v4l2;
+ struct vb2_v4l2_buffer *dst_vb2_v4l2, *src_vb2_v4l2;
/* check dst_buf, dst_buf may be removed in device_run
* to stored encdoe header so we need check dst_buf and
@@ -1043,9 +1043,14 @@
ret = venc_if_encode(ctx, VENC_START_OPT_ENCODE_FRAME,
&frm_buf, &bs_buf, &enc_result);
- vb2_v4l2 = container_of(dst_buf, struct vb2_v4l2_buffer, vb2_buf);
+ src_vb2_v4l2 = to_vb2_v4l2_buffer(src_buf);
+ dst_vb2_v4l2 = to_vb2_v4l2_buffer(dst_buf);
+
+ dst_buf->timestamp = src_buf->timestamp;
+ dst_vb2_v4l2->timecode = src_vb2_v4l2->timecode;
+
if (enc_result.is_key_frm)
- vb2_v4l2->flags |= V4L2_BUF_FLAG_KEYFRAME;
+ dst_vb2_v4l2->flags |= V4L2_BUF_FLAG_KEYFRAME;
if (ret) {
v4l2_m2m_buf_done(to_vb2_v4l2_buffer(src_buf),
@@ -1217,7 +1222,7 @@
0, V4L2_MPEG_VIDEO_HEADER_MODE_SEPARATE);
v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_PROFILE,
V4L2_MPEG_VIDEO_H264_PROFILE_HIGH,
- 0, V4L2_MPEG_VIDEO_H264_PROFILE_MAIN);
+ 0, V4L2_MPEG_VIDEO_H264_PROFILE_HIGH);
v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_LEVEL,
V4L2_MPEG_VIDEO_H264_LEVEL_4_2,
0, V4L2_MPEG_VIDEO_H264_LEVEL_4_0);
@@ -1288,5 +1293,10 @@
void mtk_vcodec_enc_release(struct mtk_vcodec_ctx *ctx)
{
- venc_if_deinit(ctx);
+ int ret = venc_if_deinit(ctx);
+
+ if (ret)
+ mtk_v4l2_err("venc_if_deinit failed=%d", ret);
+
+ ctx->state = MTK_STATE_FREE;
}
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
index c7806ec..5cd2151 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
@@ -218,11 +218,15 @@
mtk_v4l2_debug(1, "[%d] encoder", ctx->id);
mutex_lock(&dev->dev_mutex);
+ /*
+ * Call v4l2_m2m_ctx_release to make sure the worker thread is not
+ * running after venc_if_deinit.
+ */
+ v4l2_m2m_ctx_release(ctx->m2m_ctx);
mtk_vcodec_enc_release(ctx);
v4l2_fh_del(&ctx->fh);
v4l2_fh_exit(&ctx->fh);
v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
- v4l2_m2m_ctx_release(ctx->m2m_ctx);
list_del_init(&ctx->list);
dev->num_instances--;
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h
index 33e890f..1213185 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h
@@ -16,7 +16,6 @@
#define _MTK_VCODEC_INTR_H_
#define MTK_INST_IRQ_RECEIVED 0x1
-#define MTK_INST_WORK_THREAD_ABORT_DONE 0x2
struct mtk_vcodec_ctx;
diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
index 9a60052..63d4be4 100644
--- a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
+++ b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
@@ -61,6 +61,8 @@
/*
* struct venc_h264_vpu_config - Structure for h264 encoder configuration
+ * AP-W/R : AP is writer/reader on this item
+ * VPU-W/R: VPU is write/reader on this item
* @input_fourcc: input fourcc
* @bitrate: target bitrate (in bps)
* @pic_w: picture width. Picture size is visible stream resolution, in pixels,
@@ -94,13 +96,13 @@
/*
* struct venc_h264_vpu_buf - Structure for buffer information
- * @align: buffer alignment (in bytes)
+ * AP-W/R : AP is writer/reader on this item
+ * VPU-W/R: VPU is write/reader on this item
* @iova: IO virtual address
* @vpua: VPU side memory addr which is used by RC_CODE
* @size: buffer size (in bytes)
*/
struct venc_h264_vpu_buf {
- u32 align;
u32 iova;
u32 vpua;
u32 size;
@@ -108,6 +110,8 @@
/*
* struct venc_h264_vsi - Structure for VPU driver control and info share
+ * AP-W/R : AP is writer/reader on this item
+ * VPU-W/R: VPU is write/reader on this item
* This structure is allocated in VPU side and shared to AP side.
* @config: h264 encoder configuration
* @work_bufs: working buffer information in VPU side
@@ -150,12 +154,6 @@
struct mtk_vcodec_ctx *ctx;
};
-static inline void h264_write_reg(struct venc_h264_inst *inst, u32 addr,
- u32 val)
-{
- writel(val, inst->hw_base + addr);
-}
-
static inline u32 h264_read_reg(struct venc_h264_inst *inst, u32 addr)
{
return readl(inst->hw_base + addr);
@@ -214,6 +212,8 @@
return 40;
case V4L2_MPEG_VIDEO_H264_LEVEL_4_1:
return 41;
+ case V4L2_MPEG_VIDEO_H264_LEVEL_4_2:
+ return 42;
default:
mtk_vcodec_debug(inst, "unsupported level %d", level);
return 31;
diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c
index 60bbcd2..6d97584 100644
--- a/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c
+++ b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c
@@ -56,6 +56,8 @@
/*
* struct venc_vp8_vpu_config - Structure for vp8 encoder configuration
+ * AP-W/R : AP is writer/reader on this item
+ * VPU-W/R: VPU is write/reader on this item
* @input_fourcc: input fourcc
* @bitrate: target bitrate (in bps)
* @pic_w: picture width. Picture size is visible stream resolution, in pixels,
@@ -83,14 +85,14 @@
};
/*
- * struct venc_vp8_vpu_buf -Structure for buffer information
- * @align: buffer alignment (in bytes)
+ * struct venc_vp8_vpu_buf - Structure for buffer information
+ * AP-W/R : AP is writer/reader on this item
+ * VPU-W/R: VPU is write/reader on this item
* @iova: IO virtual address
* @vpua: VPU side memory addr which is used by RC_CODE
* @size: buffer size (in bytes)
*/
struct venc_vp8_vpu_buf {
- u32 align;
u32 iova;
u32 vpua;
u32 size;
@@ -98,6 +100,8 @@
/*
* struct venc_vp8_vsi - Structure for VPU driver control and info share
+ * AP-W/R : AP is writer/reader on this item
+ * VPU-W/R: VPU is write/reader on this item
* This structure is allocated in VPU side and shared to AP side.
* @config: vp8 encoder configuration
* @work_bufs: working buffer information in VPU side
@@ -138,12 +142,6 @@
struct mtk_vcodec_ctx *ctx;
};
-static inline void vp8_enc_write_reg(struct venc_vp8_inst *inst, u32 addr,
- u32 val)
-{
- writel(val, inst->hw_base + addr);
-}
-
static inline u32 vp8_enc_read_reg(struct venc_vp8_inst *inst, u32 addr)
{
return readl(inst->hw_base + addr);
diff --git a/drivers/media/platform/rcar-fcp.c b/drivers/media/platform/rcar-fcp.c
index 6a7bcc3..bc50c69 100644
--- a/drivers/media/platform/rcar-fcp.c
+++ b/drivers/media/platform/rcar-fcp.c
@@ -99,10 +99,16 @@
*/
int rcar_fcp_enable(struct rcar_fcp_device *fcp)
{
+ int error;
+
if (!fcp)
return 0;
- return pm_runtime_get_sync(fcp->dev);
+ error = pm_runtime_get_sync(fcp->dev);
+ if (error < 0)
+ return error;
+
+ return 0;
}
EXPORT_SYMBOL_GPL(rcar_fcp_enable);
diff --git a/drivers/memory/of_memory.c b/drivers/memory/of_memory.c
index 9daf94b..568f05e 100644
--- a/drivers/memory/of_memory.c
+++ b/drivers/memory/of_memory.c
@@ -16,6 +16,7 @@
#include <linux/gfp.h>
#include <memory/jedec_ddr.h>
#include <linux/export.h>
+#include "of_memory.h"
/**
* of_get_min_tck() - extract min timing values for ddr
diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
index bd3aa45..c8dee47 100644
--- a/drivers/mfd/lpc_ich.c
+++ b/drivers/mfd/lpc_ich.c
@@ -984,6 +984,10 @@
int ret;
struct resource *res;
+ /* If we have ACPI based watchdog use that instead */
+ if (acpi_has_watchdog())
+ return -ENODEV;
+
/* Setup power management base register */
pci_read_config_dword(dev, priv->abase, &base_addr_cfg);
base_addr = base_addr_cfg & 0x0000ff80;
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index d002528..64971ba 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -429,34 +429,6 @@
line and the Linux version on the second line, but that's
still useful.
-config BMP085
- tristate
- depends on SYSFS
-
-config BMP085_I2C
- tristate "BMP085 digital pressure sensor on I2C"
- select BMP085
- select REGMAP_I2C
- depends on I2C && SYSFS
- help
- Say Y here if you want to support Bosch Sensortec's digital pressure
- sensor hooked to an I2C bus.
-
- To compile this driver as a module, choose M here: the
- module will be called bmp085-i2c.
-
-config BMP085_SPI
- tristate "BMP085 digital pressure sensor on SPI"
- select BMP085
- select REGMAP_SPI
- depends on SPI_MASTER && SYSFS
- help
- Say Y here if you want to support Bosch Sensortec's digital pressure
- sensor hooked to an SPI bus.
-
- To compile this driver as a module, choose M here: the
- module will be called bmp085-spi.
-
config PCH_PHUB
tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) PHUB"
select GENERIC_NET_UTILS
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index fb32516..3198336 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -9,9 +9,6 @@
obj-$(CONFIG_INTEL_MID_PTI) += pti.o
obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o
obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o
-obj-$(CONFIG_BMP085) += bmp085.o
-obj-$(CONFIG_BMP085_I2C) += bmp085-i2c.o
-obj-$(CONFIG_BMP085_SPI) += bmp085-spi.o
obj-$(CONFIG_DUMMY_IRQ) += dummy-irq.o
obj-$(CONFIG_ICS932S401) += ics932s401.o
obj-$(CONFIG_LKDTM) += lkdtm.o
diff --git a/drivers/misc/bmp085-i2c.c b/drivers/misc/bmp085-i2c.c
deleted file mode 100644
index f35c218..0000000
--- a/drivers/misc/bmp085-i2c.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2012 Bosch Sensortec GmbH
- * Copyright (c) 2012 Unixphere AB
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/module.h>
-#include <linux/i2c.h>
-#include <linux/err.h>
-#include "bmp085.h"
-
-#define BMP085_I2C_ADDRESS 0x77
-
-static const unsigned short normal_i2c[] = { BMP085_I2C_ADDRESS,
- I2C_CLIENT_END };
-
-static int bmp085_i2c_detect(struct i2c_client *client,
- struct i2c_board_info *info)
-{
- if (client->addr != BMP085_I2C_ADDRESS)
- return -ENODEV;
-
- return bmp085_detect(&client->dev);
-}
-
-static int bmp085_i2c_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
-{
- int err;
- struct regmap *regmap = devm_regmap_init_i2c(client,
- &bmp085_regmap_config);
-
- if (IS_ERR(regmap)) {
- err = PTR_ERR(regmap);
- dev_err(&client->dev, "Failed to init regmap: %d\n", err);
- return err;
- }
-
- return bmp085_probe(&client->dev, regmap, client->irq);
-}
-
-static int bmp085_i2c_remove(struct i2c_client *client)
-{
- return bmp085_remove(&client->dev);
-}
-
-static const struct i2c_device_id bmp085_id[] = {
- { BMP085_NAME, 0 },
- { "bmp180", 0 },
- { }
-};
-MODULE_DEVICE_TABLE(i2c, bmp085_id);
-
-static struct i2c_driver bmp085_i2c_driver = {
- .driver = {
- .name = BMP085_NAME,
- },
- .id_table = bmp085_id,
- .probe = bmp085_i2c_probe,
- .remove = bmp085_i2c_remove,
-
- .detect = bmp085_i2c_detect,
- .address_list = normal_i2c
-};
-
-module_i2c_driver(bmp085_i2c_driver);
-
-MODULE_AUTHOR("Eric Andersson <eric.andersson@unixphere.com>");
-MODULE_DESCRIPTION("BMP085 I2C bus driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/misc/bmp085-spi.c b/drivers/misc/bmp085-spi.c
deleted file mode 100644
index 17ecbf9..0000000
--- a/drivers/misc/bmp085-spi.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2012 Bosch Sensortec GmbH
- * Copyright (c) 2012 Unixphere AB
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/module.h>
-#include <linux/spi/spi.h>
-#include <linux/err.h>
-#include "bmp085.h"
-
-static int bmp085_spi_probe(struct spi_device *client)
-{
- int err;
- struct regmap *regmap;
-
- client->bits_per_word = 8;
- err = spi_setup(client);
- if (err < 0) {
- dev_err(&client->dev, "spi_setup failed!\n");
- return err;
- }
-
- regmap = devm_regmap_init_spi(client, &bmp085_regmap_config);
- if (IS_ERR(regmap)) {
- err = PTR_ERR(regmap);
- dev_err(&client->dev, "Failed to init regmap: %d\n", err);
- return err;
- }
-
- return bmp085_probe(&client->dev, regmap, client->irq);
-}
-
-static int bmp085_spi_remove(struct spi_device *client)
-{
- return bmp085_remove(&client->dev);
-}
-
-static const struct of_device_id bmp085_of_match[] = {
- { .compatible = "bosch,bmp085", },
- { },
-};
-MODULE_DEVICE_TABLE(of, bmp085_of_match);
-
-static const struct spi_device_id bmp085_id[] = {
- { "bmp180", 0 },
- { "bmp181", 0 },
- { }
-};
-MODULE_DEVICE_TABLE(spi, bmp085_id);
-
-static struct spi_driver bmp085_spi_driver = {
- .driver = {
- .name = BMP085_NAME,
- .of_match_table = bmp085_of_match
- },
- .id_table = bmp085_id,
- .probe = bmp085_spi_probe,
- .remove = bmp085_spi_remove
-};
-
-module_spi_driver(bmp085_spi_driver);
-
-MODULE_AUTHOR("Eric Andersson <eric.andersson@unixphere.com>");
-MODULE_DESCRIPTION("BMP085 SPI bus driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/misc/bmp085.c b/drivers/misc/bmp085.c
deleted file mode 100644
index 9b313f7..0000000
--- a/drivers/misc/bmp085.c
+++ /dev/null
@@ -1,506 +0,0 @@
-/* Copyright (c) 2010 Christoph Mair <christoph.mair@gmail.com>
- * Copyright (c) 2012 Bosch Sensortec GmbH
- * Copyright (c) 2012 Unixphere AB
- *
- * This driver supports the bmp085 and bmp18x digital barometric pressure
- * and temperature sensors from Bosch Sensortec. The datasheets
- * are available from their website:
- * http://www.bosch-sensortec.com/content/language1/downloads/BST-BMP085-DS000-05.pdf
- * http://www.bosch-sensortec.com/content/language1/downloads/BST-BMP180-DS000-07.pdf
- *
- * A pressure measurement is issued by reading from pressure0_input.
- * The return value ranges from 30000 to 110000 pascal with a resulution
- * of 1 pascal (0.01 millibar) which enables measurements from 9000m above
- * to 500m below sea level.
- *
- * The temperature can be read from temp0_input. Values range from
- * -400 to 850 representing the ambient temperature in degree celsius
- * multiplied by 10.The resolution is 0.1 celsius.
- *
- * Because ambient pressure is temperature dependent, a temperature
- * measurement will be executed automatically even if the user is reading
- * from pressure0_input. This happens if the last temperature measurement
- * has been executed more then one second ago.
- *
- * To decrease RMS noise from pressure measurements, the bmp085 can
- * autonomously calculate the average of up to eight samples. This is
- * set up by writing to the oversampling sysfs file. Accepted values
- * are 0, 1, 2 and 3. 2^x when x is the value written to this file
- * specifies the number of samples used to calculate the ambient pressure.
- * RMS noise is specified with six pascal (without averaging) and decreases
- * down to 3 pascal when using an oversampling setting of 3.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/slab.h>
-#include <linux/of.h>
-#include "bmp085.h"
-#include <linux/interrupt.h>
-#include <linux/completion.h>
-#include <linux/gpio.h>
-
-#define BMP085_CHIP_ID 0x55
-#define BMP085_CALIBRATION_DATA_START 0xAA
-#define BMP085_CALIBRATION_DATA_LENGTH 11 /* 16 bit values */
-#define BMP085_CHIP_ID_REG 0xD0
-#define BMP085_CTRL_REG 0xF4
-#define BMP085_TEMP_MEASUREMENT 0x2E
-#define BMP085_PRESSURE_MEASUREMENT 0x34
-#define BMP085_CONVERSION_REGISTER_MSB 0xF6
-#define BMP085_CONVERSION_REGISTER_LSB 0xF7
-#define BMP085_CONVERSION_REGISTER_XLSB 0xF8
-#define BMP085_TEMP_CONVERSION_TIME 5
-
-struct bmp085_calibration_data {
- s16 AC1, AC2, AC3;
- u16 AC4, AC5, AC6;
- s16 B1, B2;
- s16 MB, MC, MD;
-};
-
-struct bmp085_data {
- struct device *dev;
- struct regmap *regmap;
- struct mutex lock;
- struct bmp085_calibration_data calibration;
- u8 oversampling_setting;
- u32 raw_temperature;
- u32 raw_pressure;
- u32 temp_measurement_period;
- unsigned long last_temp_measurement;
- u8 chip_id;
- s32 b6; /* calculated temperature correction coefficient */
- int irq;
- struct completion done;
-};
-
-static irqreturn_t bmp085_eoc_isr(int irq, void *devid)
-{
- struct bmp085_data *data = devid;
-
- complete(&data->done);
-
- return IRQ_HANDLED;
-}
-
-static s32 bmp085_read_calibration_data(struct bmp085_data *data)
-{
- u16 tmp[BMP085_CALIBRATION_DATA_LENGTH];
- struct bmp085_calibration_data *cali = &(data->calibration);
- s32 status = regmap_bulk_read(data->regmap,
- BMP085_CALIBRATION_DATA_START, (u8 *)tmp,
- (BMP085_CALIBRATION_DATA_LENGTH << 1));
- if (status < 0)
- return status;
-
- cali->AC1 = be16_to_cpu(tmp[0]);
- cali->AC2 = be16_to_cpu(tmp[1]);
- cali->AC3 = be16_to_cpu(tmp[2]);
- cali->AC4 = be16_to_cpu(tmp[3]);
- cali->AC5 = be16_to_cpu(tmp[4]);
- cali->AC6 = be16_to_cpu(tmp[5]);
- cali->B1 = be16_to_cpu(tmp[6]);
- cali->B2 = be16_to_cpu(tmp[7]);
- cali->MB = be16_to_cpu(tmp[8]);
- cali->MC = be16_to_cpu(tmp[9]);
- cali->MD = be16_to_cpu(tmp[10]);
- return 0;
-}
-
-static s32 bmp085_update_raw_temperature(struct bmp085_data *data)
-{
- u16 tmp;
- s32 status;
-
- mutex_lock(&data->lock);
-
- init_completion(&data->done);
-
- status = regmap_write(data->regmap, BMP085_CTRL_REG,
- BMP085_TEMP_MEASUREMENT);
- if (status < 0) {
- dev_err(data->dev,
- "Error while requesting temperature measurement.\n");
- goto exit;
- }
- wait_for_completion_timeout(&data->done, 1 + msecs_to_jiffies(
- BMP085_TEMP_CONVERSION_TIME));
-
- status = regmap_bulk_read(data->regmap, BMP085_CONVERSION_REGISTER_MSB,
- &tmp, sizeof(tmp));
- if (status < 0) {
- dev_err(data->dev,
- "Error while reading temperature measurement result\n");
- goto exit;
- }
- data->raw_temperature = be16_to_cpu(tmp);
- data->last_temp_measurement = jiffies;
- status = 0; /* everything ok, return 0 */
-
-exit:
- mutex_unlock(&data->lock);
- return status;
-}
-
-static s32 bmp085_update_raw_pressure(struct bmp085_data *data)
-{
- u32 tmp = 0;
- s32 status;
-
- mutex_lock(&data->lock);
-
- init_completion(&data->done);
-
- status = regmap_write(data->regmap, BMP085_CTRL_REG,
- BMP085_PRESSURE_MEASUREMENT +
- (data->oversampling_setting << 6));
- if (status < 0) {
- dev_err(data->dev,
- "Error while requesting pressure measurement.\n");
- goto exit;
- }
-
- /* wait for the end of conversion */
- wait_for_completion_timeout(&data->done, 1 + msecs_to_jiffies(
- 2+(3 << data->oversampling_setting)));
- /* copy data into a u32 (4 bytes), but skip the first byte. */
- status = regmap_bulk_read(data->regmap, BMP085_CONVERSION_REGISTER_MSB,
- ((u8 *)&tmp)+1, 3);
- if (status < 0) {
- dev_err(data->dev,
- "Error while reading pressure measurement results\n");
- goto exit;
- }
- data->raw_pressure = be32_to_cpu((tmp));
- data->raw_pressure >>= (8-data->oversampling_setting);
- status = 0; /* everything ok, return 0 */
-
-exit:
- mutex_unlock(&data->lock);
- return status;
-}
-
-/*
- * This function starts the temperature measurement and returns the value
- * in tenth of a degree celsius.
- */
-static s32 bmp085_get_temperature(struct bmp085_data *data, int *temperature)
-{
- struct bmp085_calibration_data *cali = &data->calibration;
- long x1, x2;
- int status;
-
- status = bmp085_update_raw_temperature(data);
- if (status < 0)
- goto exit;
-
- x1 = ((data->raw_temperature - cali->AC6) * cali->AC5) >> 15;
- x2 = (cali->MC << 11) / (x1 + cali->MD);
- data->b6 = x1 + x2 - 4000;
- /* if NULL just update b6. Used for pressure only measurements */
- if (temperature != NULL)
- *temperature = (x1+x2+8) >> 4;
-
-exit:
- return status;
-}
-
-/*
- * This function starts the pressure measurement and returns the value
- * in millibar. Since the pressure depends on the ambient temperature,
- * a temperature measurement is executed according to the given temperature
- * measurement period (default is 1 sec boundary). This period could vary
- * and needs to be adjusted according to the sensor environment, i.e. if big
- * temperature variations then the temperature needs to be read out often.
- */
-static s32 bmp085_get_pressure(struct bmp085_data *data, int *pressure)
-{
- struct bmp085_calibration_data *cali = &data->calibration;
- s32 x1, x2, x3, b3;
- u32 b4, b7;
- s32 p;
- int status;
-
- /* alt least every second force an update of the ambient temperature */
- if ((data->last_temp_measurement == 0) ||
- time_is_before_jiffies(data->last_temp_measurement + 1*HZ)) {
- status = bmp085_get_temperature(data, NULL);
- if (status < 0)
- return status;
- }
-
- status = bmp085_update_raw_pressure(data);
- if (status < 0)
- return status;
-
- x1 = (data->b6 * data->b6) >> 12;
- x1 *= cali->B2;
- x1 >>= 11;
-
- x2 = cali->AC2 * data->b6;
- x2 >>= 11;
-
- x3 = x1 + x2;
-
- b3 = (((((s32)cali->AC1) * 4 + x3) << data->oversampling_setting) + 2);
- b3 >>= 2;
-
- x1 = (cali->AC3 * data->b6) >> 13;
- x2 = (cali->B1 * ((data->b6 * data->b6) >> 12)) >> 16;
- x3 = (x1 + x2 + 2) >> 2;
- b4 = (cali->AC4 * (u32)(x3 + 32768)) >> 15;
-
- b7 = ((u32)data->raw_pressure - b3) *
- (50000 >> data->oversampling_setting);
- p = ((b7 < 0x80000000) ? ((b7 << 1) / b4) : ((b7 / b4) * 2));
-
- x1 = p >> 8;
- x1 *= x1;
- x1 = (x1 * 3038) >> 16;
- x2 = (-7357 * p) >> 16;
- p += (x1 + x2 + 3791) >> 4;
-
- *pressure = p;
-
- return 0;
-}
-
-/*
- * This function sets the chip-internal oversampling. Valid values are 0..3.
- * The chip will use 2^oversampling samples for internal averaging.
- * This influences the measurement time and the accuracy; larger values
- * increase both. The datasheet gives an overview on how measurement time,
- * accuracy and noise correlate.
- */
-static void bmp085_set_oversampling(struct bmp085_data *data,
- unsigned char oversampling)
-{
- if (oversampling > 3)
- oversampling = 3;
- data->oversampling_setting = oversampling;
-}
-
-/*
- * Returns the currently selected oversampling. Range: 0..3
- */
-static unsigned char bmp085_get_oversampling(struct bmp085_data *data)
-{
- return data->oversampling_setting;
-}
-
-/* sysfs callbacks */
-static ssize_t set_oversampling(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct bmp085_data *data = dev_get_drvdata(dev);
- unsigned long oversampling;
- int err = kstrtoul(buf, 10, &oversampling);
-
- if (err == 0) {
- mutex_lock(&data->lock);
- bmp085_set_oversampling(data, oversampling);
- mutex_unlock(&data->lock);
- return count;
- }
-
- return err;
-}
-
-static ssize_t show_oversampling(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct bmp085_data *data = dev_get_drvdata(dev);
-
- return sprintf(buf, "%u\n", bmp085_get_oversampling(data));
-}
-static DEVICE_ATTR(oversampling, S_IWUSR | S_IRUGO,
- show_oversampling, set_oversampling);
-
-
-static ssize_t show_temperature(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- int temperature;
- int status;
- struct bmp085_data *data = dev_get_drvdata(dev);
-
- status = bmp085_get_temperature(data, &temperature);
- if (status < 0)
- return status;
- else
- return sprintf(buf, "%d\n", temperature);
-}
-static DEVICE_ATTR(temp0_input, S_IRUGO, show_temperature, NULL);
-
-
-static ssize_t show_pressure(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- int pressure;
- int status;
- struct bmp085_data *data = dev_get_drvdata(dev);
-
- status = bmp085_get_pressure(data, &pressure);
- if (status < 0)
- return status;
- else
- return sprintf(buf, "%d\n", pressure);
-}
-static DEVICE_ATTR(pressure0_input, S_IRUGO, show_pressure, NULL);
-
-
-static struct attribute *bmp085_attributes[] = {
- &dev_attr_temp0_input.attr,
- &dev_attr_pressure0_input.attr,
- &dev_attr_oversampling.attr,
- NULL
-};
-
-static const struct attribute_group bmp085_attr_group = {
- .attrs = bmp085_attributes,
-};
-
-int bmp085_detect(struct device *dev)
-{
- struct bmp085_data *data = dev_get_drvdata(dev);
- unsigned int id;
- int ret;
-
- ret = regmap_read(data->regmap, BMP085_CHIP_ID_REG, &id);
- if (ret < 0)
- return ret;
-
- if (id != data->chip_id)
- return -ENODEV;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(bmp085_detect);
-
-static void bmp085_get_of_properties(struct bmp085_data *data)
-{
-#ifdef CONFIG_OF
- struct device_node *np = data->dev->of_node;
- u32 prop;
-
- if (!np)
- return;
-
- if (!of_property_read_u32(np, "chip-id", &prop))
- data->chip_id = prop & 0xff;
-
- if (!of_property_read_u32(np, "temp-measurement-period", &prop))
- data->temp_measurement_period = (prop/100)*HZ;
-
- if (!of_property_read_u32(np, "default-oversampling", &prop))
- data->oversampling_setting = prop & 0xff;
-#endif
-}
-
-static int bmp085_init_client(struct bmp085_data *data)
-{
- int status = bmp085_read_calibration_data(data);
-
- if (status < 0)
- return status;
-
- /* default settings */
- data->chip_id = BMP085_CHIP_ID;
- data->last_temp_measurement = 0;
- data->temp_measurement_period = 1*HZ;
- data->oversampling_setting = 3;
-
- bmp085_get_of_properties(data);
-
- mutex_init(&data->lock);
-
- return 0;
-}
-
-struct regmap_config bmp085_regmap_config = {
- .reg_bits = 8,
- .val_bits = 8
-};
-EXPORT_SYMBOL_GPL(bmp085_regmap_config);
-
-int bmp085_probe(struct device *dev, struct regmap *regmap, int irq)
-{
- struct bmp085_data *data;
- int err = 0;
-
- data = kzalloc(sizeof(struct bmp085_data), GFP_KERNEL);
- if (!data) {
- err = -ENOMEM;
- goto exit;
- }
-
- dev_set_drvdata(dev, data);
- data->dev = dev;
- data->regmap = regmap;
- data->irq = irq;
-
- if (data->irq > 0) {
- err = devm_request_irq(dev, data->irq, bmp085_eoc_isr,
- IRQF_TRIGGER_RISING, "bmp085",
- data);
- if (err < 0)
- goto exit_free;
- }
-
- /* Initialize the BMP085 chip */
- err = bmp085_init_client(data);
- if (err < 0)
- goto exit_free;
-
- err = bmp085_detect(dev);
- if (err < 0) {
- dev_err(dev, "%s: chip_id failed!\n", BMP085_NAME);
- goto exit_free;
- }
-
- /* Register sysfs hooks */
- err = sysfs_create_group(&dev->kobj, &bmp085_attr_group);
- if (err)
- goto exit_free;
-
- dev_info(dev, "Successfully initialized %s!\n", BMP085_NAME);
-
- return 0;
-
-exit_free:
- kfree(data);
-exit:
- return err;
-}
-EXPORT_SYMBOL_GPL(bmp085_probe);
-
-int bmp085_remove(struct device *dev)
-{
- struct bmp085_data *data = dev_get_drvdata(dev);
-
- sysfs_remove_group(&data->dev->kobj, &bmp085_attr_group);
- kfree(data);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(bmp085_remove);
-
-MODULE_AUTHOR("Christoph Mair <christoph.mair@gmail.com>");
-MODULE_DESCRIPTION("BMP085 driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/misc/bmp085.h b/drivers/misc/bmp085.h
deleted file mode 100644
index 8b8e3b1..0000000
--- a/drivers/misc/bmp085.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2012 Bosch Sensortec GmbH
- * Copyright (c) 2012 Unixphere AB
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _BMP085_H
-#define _BMP085_H
-
-#include <linux/regmap.h>
-
-#define BMP085_NAME "bmp085"
-
-extern struct regmap_config bmp085_regmap_config;
-
-int bmp085_probe(struct device *dev, struct regmap *regmap, int irq);
-int bmp085_remove(struct device *dev);
-int bmp085_detect(struct device *dev);
-
-#endif
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 2c6c7c8..5afe4cd 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -121,9 +121,8 @@
* this chip is clocked very slowly
*/
status = spi_sync(at25->spi, &m);
- dev_dbg(&at25->spi->dev,
- "read %Zd bytes at %d --> %d\n",
- count, offset, (int) status);
+ dev_dbg(&at25->spi->dev, "read %zu bytes at %d --> %zd\n",
+ count, offset, status);
mutex_unlock(&at25->lock);
return status;
@@ -167,8 +166,7 @@
*cp = AT25_WREN;
status = spi_write(at25->spi, cp, 1);
if (status < 0) {
- dev_dbg(&at25->spi->dev, "WREN --> %d\n",
- (int) status);
+ dev_dbg(&at25->spi->dev, "WREN --> %d\n", status);
break;
}
@@ -196,9 +194,8 @@
memcpy(cp, buf, segment);
status = spi_write(at25->spi, bounce,
segment + at25->addrlen + 1);
- dev_dbg(&at25->spi->dev,
- "write %u bytes at %u --> %d\n",
- segment, offset, (int) status);
+ dev_dbg(&at25->spi->dev, "write %u bytes at %u --> %d\n",
+ segment, offset, status);
if (status < 0)
break;
@@ -225,8 +222,7 @@
if ((sr < 0) || (sr & AT25_SR_nRDY)) {
dev_err(&at25->spi->dev,
- "write %d bytes offset %d, "
- "timeout after %u msecs\n",
+ "write %u bytes offset %u, timeout after %u msecs\n",
segment, offset,
jiffies_to_msecs(jiffies -
(timeout - EE_TIMEOUT)));
@@ -368,9 +364,7 @@
return PTR_ERR(at25->nvmem);
dev_info(&spi->dev, "%d %s %s eeprom%s, pagesize %u\n",
- (chip.byte_len < 1024)
- ? chip.byte_len
- : (chip.byte_len / 1024),
+ (chip.byte_len < 1024) ? chip.byte_len : (chip.byte_len / 1024),
(chip.byte_len < 1024) ? "Byte" : "KByte",
at25->chip.name,
(chip.flags & EE_READONLY) ? " (readonly)" : "",
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
index a70b853..6c1f49a 100644
--- a/drivers/misc/genwqe/card_base.c
+++ b/drivers/misc/genwqe/card_base.c
@@ -1351,6 +1351,19 @@
};
/**
+ * genwqe_devnode() - Set default access mode for genwqe devices.
+ *
+ * Default mode should be rw for everybody. Do not change default
+ * device name.
+ */
+static char *genwqe_devnode(struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0666;
+ return NULL;
+}
+
+/**
* genwqe_init_module() - Driver registration and initialization
*/
static int __init genwqe_init_module(void)
@@ -1363,6 +1376,8 @@
return -ENOMEM;
}
+ class_genwqe->devnode = genwqe_devnode;
+
debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL);
if (!debugfs_genwqe) {
rc = -ENOMEM;
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
index 353ee0c..ddfeefe 100644
--- a/drivers/misc/genwqe/card_ddcb.c
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -1048,8 +1048,6 @@
"[%s] **err: could not allocate DDCB **\n", __func__);
return -ENOMEM;
}
- memset(queue->ddcb_vaddr, 0, queue_size);
-
queue->ddcb_req = kzalloc(sizeof(struct ddcb_requ *) *
queue->ddcb_max, GFP_KERNEL);
if (!queue->ddcb_req) {
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
index 222367c..8a679ec 100644
--- a/drivers/misc/genwqe/card_utils.c
+++ b/drivers/misc/genwqe/card_utils.c
@@ -220,8 +220,8 @@
if (get_order(size) > MAX_ORDER)
return NULL;
- return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle,
- GFP_KERNEL);
+ return dma_zalloc_coherent(&cd->pci_dev->dev, size, dma_handle,
+ GFP_KERNEL);
}
void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c
index d6a901c..fea8ff4 100644
--- a/drivers/misc/hpilo.c
+++ b/drivers/misc/hpilo.c
@@ -688,7 +688,8 @@
static int ilo_map_device(struct pci_dev *pdev, struct ilo_hwinfo *hw)
{
- int error = -ENOMEM;
+ int bar;
+ unsigned long off;
/* map the memory mapped i/o registers */
hw->mmio_vaddr = pci_iomap(pdev, 1, 0);
@@ -698,7 +699,15 @@
}
/* map the adapter shared memory region */
- hw->ram_vaddr = pci_iomap(pdev, 2, max_ccb * ILOHW_CCB_SZ);
+ if (pdev->subsystem_device == 0x00E4) {
+ bar = 5;
+ /* Last 8k is reserved for CCBs */
+ off = pci_resource_len(pdev, bar) - 0x2000;
+ } else {
+ bar = 2;
+ off = 0;
+ }
+ hw->ram_vaddr = pci_iomap_range(pdev, bar, off, max_ccb * ILOHW_CCB_SZ);
if (hw->ram_vaddr == NULL) {
dev_err(&pdev->dev, "Error mapping shared mem\n");
goto mmio_free;
@@ -717,7 +726,7 @@
mmio_free:
pci_iounmap(pdev, hw->mmio_vaddr);
out:
- return error;
+ return -ENOMEM;
}
static void ilo_remove(struct pci_dev *pdev)
@@ -899,7 +908,7 @@
class_destroy(ilo_class);
}
-MODULE_VERSION("1.4.1");
+MODULE_VERSION("1.5.0");
MODULE_ALIAS(ILO_NAME);
MODULE_DESCRIPTION(ILO_NAME);
MODULE_AUTHOR("David Altobelli <david.altobelli@hpe.com>");
diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c
index a039a5d..7ae89b4 100644
--- a/drivers/misc/mei/amthif.c
+++ b/drivers/misc/mei/amthif.c
@@ -47,7 +47,6 @@
void mei_amthif_reset_params(struct mei_device *dev)
{
/* reset iamthif parameters. */
- dev->iamthif_current_cb = NULL;
dev->iamthif_canceled = false;
dev->iamthif_state = MEI_IAMTHIF_IDLE;
dev->iamthif_stall_timer = 0;
@@ -67,8 +66,12 @@
struct mei_cl *cl = &dev->iamthif_cl;
int ret;
- if (mei_cl_is_connected(cl))
- return 0;
+ mutex_lock(&dev->device_lock);
+
+ if (mei_cl_is_connected(cl)) {
+ ret = 0;
+ goto out;
+ }
dev->iamthif_state = MEI_IAMTHIF_IDLE;
@@ -77,179 +80,37 @@
ret = mei_cl_link(cl);
if (ret < 0) {
dev_err(dev->dev, "amthif: failed cl_link %d\n", ret);
- return ret;
+ goto out;
}
ret = mei_cl_connect(cl, me_cl, NULL);
- return ret;
-}
-
-/**
- * mei_amthif_read - read data from AMTHIF client
- *
- * @dev: the device structure
- * @file: pointer to file object
- * @ubuf: pointer to user data in user space
- * @length: data length to read
- * @offset: data read offset
- *
- * Locking: called under "dev->device_lock" lock
- *
- * Return:
- * returned data length on success,
- * zero if no data to read,
- * negative on failure.
- */
-int mei_amthif_read(struct mei_device *dev, struct file *file,
- char __user *ubuf, size_t length, loff_t *offset)
-{
- struct mei_cl *cl = file->private_data;
- struct mei_cl_cb *cb;
- int rets;
- int wait_ret;
-
- dev_dbg(dev->dev, "checking amthif data\n");
- cb = mei_cl_read_cb(cl, file);
-
- /* Check for if we can block or not*/
- if (cb == NULL && file->f_flags & O_NONBLOCK)
- return -EAGAIN;
-
-
- dev_dbg(dev->dev, "waiting for amthif data\n");
- while (cb == NULL) {
- /* unlock the Mutex */
- mutex_unlock(&dev->device_lock);
-
- wait_ret = wait_event_interruptible(cl->rx_wait,
- !list_empty(&cl->rd_completed) ||
- !mei_cl_is_connected(cl));
-
- /* Locking again the Mutex */
- mutex_lock(&dev->device_lock);
-
- if (wait_ret)
- return -ERESTARTSYS;
-
- if (!mei_cl_is_connected(cl)) {
- rets = -EBUSY;
- goto out;
- }
-
- cb = mei_cl_read_cb(cl, file);
- }
-
- if (cb->status) {
- rets = cb->status;
- dev_dbg(dev->dev, "read operation failed %d\n", rets);
- goto free;
- }
-
- dev_dbg(dev->dev, "Got amthif data\n");
- /* if the whole message will fit remove it from the list */
- if (cb->buf_idx >= *offset && length >= (cb->buf_idx - *offset))
- list_del_init(&cb->list);
- else if (cb->buf_idx <= *offset) {
- /* end of the message has been reached */
- list_del_init(&cb->list);
- rets = 0;
- goto free;
- }
- /* else means that not full buffer will be read and do not
- * remove message from deletion list
- */
-
- dev_dbg(dev->dev, "amthif cb->buf.size - %zu cb->buf_idx - %zu\n",
- cb->buf.size, cb->buf_idx);
-
- /* length is being truncated to PAGE_SIZE, however,
- * the buf_idx may point beyond */
- length = min_t(size_t, length, (cb->buf_idx - *offset));
-
- if (copy_to_user(ubuf, cb->buf.data + *offset, length)) {
- dev_dbg(dev->dev, "failed to copy data to userland\n");
- rets = -EFAULT;
- } else {
- rets = length;
- if ((*offset + length) < cb->buf_idx) {
- *offset += length;
- goto out;
- }
- }
-free:
- dev_dbg(dev->dev, "free amthif cb memory.\n");
- *offset = 0;
- mei_io_cb_free(cb);
out:
- return rets;
+ mutex_unlock(&dev->device_lock);
+ return ret;
}
/**
* mei_amthif_read_start - queue message for sending read credential
*
* @cl: host client
- * @file: file pointer of message recipient
+ * @fp: file pointer of message recipient
*
* Return: 0 on success, <0 on failure.
*/
-static int mei_amthif_read_start(struct mei_cl *cl, const struct file *file)
+static int mei_amthif_read_start(struct mei_cl *cl, const struct file *fp)
{
struct mei_device *dev = cl->dev;
struct mei_cl_cb *cb;
- int rets;
- cb = mei_io_cb_init(cl, MEI_FOP_READ, file);
- if (!cb) {
- rets = -ENOMEM;
- goto err;
- }
+ cb = mei_cl_enqueue_ctrl_wr_cb(cl, mei_cl_mtu(cl), MEI_FOP_READ, fp);
+ if (!cb)
+ return -ENOMEM;
- rets = mei_io_cb_alloc_buf(cb, mei_cl_mtu(cl));
- if (rets)
- goto err;
-
- list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+ cl->rx_flow_ctrl_creds++;
dev->iamthif_state = MEI_IAMTHIF_READING;
- dev->iamthif_fp = cb->fp;
- dev->iamthif_current_cb = cb;
-
- return 0;
-err:
- mei_io_cb_free(cb);
- return rets;
-}
-
-/**
- * mei_amthif_send_cmd - send amthif command to the ME
- *
- * @cl: the host client
- * @cb: mei call back struct
- *
- * Return: 0 on success, <0 on failure.
- */
-static int mei_amthif_send_cmd(struct mei_cl *cl, struct mei_cl_cb *cb)
-{
- struct mei_device *dev;
- int ret;
-
- if (!cl->dev || !cb)
- return -ENODEV;
-
- dev = cl->dev;
-
- dev->iamthif_state = MEI_IAMTHIF_WRITING;
- dev->iamthif_current_cb = cb;
- dev->iamthif_fp = cb->fp;
- dev->iamthif_canceled = false;
-
- ret = mei_cl_write(cl, cb, false);
- if (ret < 0)
- return ret;
-
- if (cb->completed)
- cb->status = mei_amthif_read_start(cl, cb->fp);
+ cl->fp = cb->fp;
return 0;
}
@@ -265,20 +126,32 @@
{
struct mei_cl *cl = &dev->iamthif_cl;
struct mei_cl_cb *cb;
+ int ret;
dev->iamthif_canceled = false;
- dev->iamthif_state = MEI_IAMTHIF_IDLE;
- dev->iamthif_fp = NULL;
dev_dbg(dev->dev, "complete amthif cmd_list cb.\n");
cb = list_first_entry_or_null(&dev->amthif_cmd_list.list,
typeof(*cb), list);
- if (!cb)
+ if (!cb) {
+ dev->iamthif_state = MEI_IAMTHIF_IDLE;
+ cl->fp = NULL;
return 0;
+ }
list_del_init(&cb->list);
- return mei_amthif_send_cmd(cl, cb);
+ dev->iamthif_state = MEI_IAMTHIF_WRITING;
+ cl->fp = cb->fp;
+
+ ret = mei_cl_write(cl, cb, false);
+ if (ret < 0)
+ return ret;
+
+ if (cb->completed)
+ cb->status = mei_amthif_read_start(cl, cb->fp);
+
+ return 0;
}
/**
@@ -299,8 +172,7 @@
/*
* The previous request is still in processing, queue this one.
*/
- if (dev->iamthif_state > MEI_IAMTHIF_IDLE &&
- dev->iamthif_state < MEI_IAMTHIF_READ_COMPLETE)
+ if (dev->iamthif_state != MEI_IAMTHIF_IDLE)
return 0;
return mei_amthif_run_next_cmd(dev);
@@ -309,7 +181,6 @@
/**
* mei_amthif_poll - the amthif poll function
*
- * @dev: the device structure
* @file: pointer to file structure
* @wait: pointer to poll_table structure
*
@@ -317,26 +188,19 @@
*
* Locking: called under "dev->device_lock" lock
*/
-
-unsigned int mei_amthif_poll(struct mei_device *dev,
- struct file *file, poll_table *wait)
+unsigned int mei_amthif_poll(struct file *file, poll_table *wait)
{
+ struct mei_cl *cl = file->private_data;
+ struct mei_cl_cb *cb = mei_cl_read_cb(cl, file);
unsigned int mask = 0;
- poll_wait(file, &dev->iamthif_cl.rx_wait, wait);
-
- if (dev->iamthif_state == MEI_IAMTHIF_READ_COMPLETE &&
- dev->iamthif_fp == file) {
-
+ poll_wait(file, &cl->rx_wait, wait);
+ if (cb)
mask |= POLLIN | POLLRDNORM;
- mei_amthif_run_next_cmd(dev);
- }
return mask;
}
-
-
/**
* mei_amthif_irq_write - write iamthif command in irq thread context.
*
@@ -393,7 +257,6 @@
return 0;
dev_dbg(dev->dev, "completed amthif read.\n ");
- dev->iamthif_current_cb = NULL;
dev->iamthif_stall_timer = 0;
return 0;
@@ -409,115 +272,63 @@
{
struct mei_device *dev = cl->dev;
- if (cb->fop_type == MEI_FOP_WRITE) {
+ dev_dbg(dev->dev, "completing amthif call back.\n");
+ switch (cb->fop_type) {
+ case MEI_FOP_WRITE:
if (!cb->status) {
dev->iamthif_stall_timer = MEI_IAMTHIF_STALL_TIMER;
+ mei_schedule_stall_timer(dev);
mei_io_cb_free(cb);
return;
}
- /*
- * in case of error enqueue the write cb to complete read list
- * so it can be propagated to the reader
- */
- list_add_tail(&cb->list, &cl->rd_completed);
- wake_up_interruptible(&cl->rx_wait);
- return;
- }
+ dev->iamthif_state = MEI_IAMTHIF_IDLE;
+ cl->fp = NULL;
+ if (!dev->iamthif_canceled) {
+ /*
+ * in case of error enqueue the write cb to complete
+ * read list so it can be propagated to the reader
+ */
+ list_add_tail(&cb->list, &cl->rd_completed);
+ wake_up_interruptible(&cl->rx_wait);
+ } else {
+ mei_io_cb_free(cb);
+ }
+ break;
+ case MEI_FOP_READ:
+ if (!dev->iamthif_canceled) {
+ list_add_tail(&cb->list, &cl->rd_completed);
+ dev_dbg(dev->dev, "amthif read completed\n");
+ wake_up_interruptible(&cl->rx_wait);
+ } else {
+ mei_io_cb_free(cb);
+ }
- if (!dev->iamthif_canceled) {
- dev->iamthif_state = MEI_IAMTHIF_READ_COMPLETE;
dev->iamthif_stall_timer = 0;
- list_add_tail(&cb->list, &cl->rd_completed);
- dev_dbg(dev->dev, "amthif read completed\n");
- } else {
mei_amthif_run_next_cmd(dev);
+ break;
+ default:
+ WARN_ON(1);
}
-
- dev_dbg(dev->dev, "completing amthif call back.\n");
- wake_up_interruptible(&cl->rx_wait);
}
/**
* mei_clear_list - removes all callbacks associated with file
* from mei_cb_list
*
- * @dev: device structure.
* @file: file structure
* @mei_cb_list: callbacks list
*
* mei_clear_list is called to clear resources associated with file
* when application calls close function or Ctrl-C was pressed
- *
- * Return: true if callback removed from the list, false otherwise
*/
-static bool mei_clear_list(struct mei_device *dev,
- const struct file *file, struct list_head *mei_cb_list)
+static void mei_clear_list(const struct file *file,
+ struct list_head *mei_cb_list)
{
- struct mei_cl *cl = &dev->iamthif_cl;
struct mei_cl_cb *cb, *next;
- bool removed = false;
- /* list all list member */
- list_for_each_entry_safe(cb, next, mei_cb_list, list) {
- /* check if list member associated with a file */
- if (file == cb->fp) {
- /* check if cb equal to current iamthif cb */
- if (dev->iamthif_current_cb == cb) {
- dev->iamthif_current_cb = NULL;
- /* send flow control to iamthif client */
- mei_hbm_cl_flow_control_req(dev, cl);
- }
- /* free all allocated buffers */
+ list_for_each_entry_safe(cb, next, mei_cb_list, list)
+ if (file == cb->fp)
mei_io_cb_free(cb);
- removed = true;
- }
- }
- return removed;
-}
-
-/**
- * mei_clear_lists - removes all callbacks associated with file
- *
- * @dev: device structure
- * @file: file structure
- *
- * mei_clear_lists is called to clear resources associated with file
- * when application calls close function or Ctrl-C was pressed
- *
- * Return: true if callback removed from the list, false otherwise
- */
-static bool mei_clear_lists(struct mei_device *dev, const struct file *file)
-{
- bool removed = false;
- struct mei_cl *cl = &dev->iamthif_cl;
-
- /* remove callbacks associated with a file */
- mei_clear_list(dev, file, &dev->amthif_cmd_list.list);
- if (mei_clear_list(dev, file, &cl->rd_completed))
- removed = true;
-
- mei_clear_list(dev, file, &dev->ctrl_rd_list.list);
-
- if (mei_clear_list(dev, file, &dev->ctrl_wr_list.list))
- removed = true;
-
- if (mei_clear_list(dev, file, &dev->write_waiting_list.list))
- removed = true;
-
- if (mei_clear_list(dev, file, &dev->write_list.list))
- removed = true;
-
- /* check if iamthif_current_cb not NULL */
- if (dev->iamthif_current_cb && !removed) {
- /* check file and iamthif current cb association */
- if (dev->iamthif_current_cb->fp == file) {
- /* remove cb */
- mei_io_cb_free(dev->iamthif_current_cb);
- dev->iamthif_current_cb = NULL;
- removed = true;
- }
- }
- return removed;
}
/**
@@ -530,23 +341,21 @@
*/
int mei_amthif_release(struct mei_device *dev, struct file *file)
{
+ struct mei_cl *cl = file->private_data;
+
if (dev->iamthif_open_count > 0)
dev->iamthif_open_count--;
- if (dev->iamthif_fp == file &&
- dev->iamthif_state != MEI_IAMTHIF_IDLE) {
+ if (cl->fp == file && dev->iamthif_state != MEI_IAMTHIF_IDLE) {
dev_dbg(dev->dev, "amthif canceled iamthif state %d\n",
dev->iamthif_state);
dev->iamthif_canceled = true;
- if (dev->iamthif_state == MEI_IAMTHIF_READ_COMPLETE) {
- dev_dbg(dev->dev, "run next amthif iamthif cb\n");
- mei_amthif_run_next_cmd(dev);
- }
}
- if (mei_clear_lists(dev, file))
- dev->iamthif_state = MEI_IAMTHIF_IDLE;
+ mei_clear_list(file, &dev->amthif_cmd_list.list);
+ mei_clear_list(file, &cl->rd_completed);
+ mei_clear_list(file, &dev->ctrl_rd_list.list);
return 0;
}
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 1f33fea..8cac7ef 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -126,7 +126,8 @@
goto out;
/* wait on event only if there is no other waiter */
- if (list_empty(&cl->rd_completed) && !waitqueue_active(&cl->rx_wait)) {
+ /* synchronized under device mutex */
+ if (!waitqueue_active(&cl->rx_wait)) {
mutex_unlock(&bus->device_lock);
@@ -142,7 +143,7 @@
mutex_lock(&bus->device_lock);
if (!mei_cl_is_connected(cl)) {
- rets = -EBUSY;
+ rets = -ENODEV;
goto out;
}
}
@@ -234,7 +235,7 @@
/* Prepare for the next read */
if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
mutex_lock(&bus->device_lock);
- mei_cl_read_start(cldev->cl, 0, NULL);
+ mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL);
mutex_unlock(&bus->device_lock);
}
}
@@ -324,7 +325,7 @@
if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
mutex_lock(&bus->device_lock);
- ret = mei_cl_read_start(cldev->cl, 0, NULL);
+ ret = mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL);
mutex_unlock(&bus->device_lock);
if (ret && ret != -EBUSY)
return ret;
@@ -983,12 +984,10 @@
container_of(work, struct mei_device, bus_rescan_work);
struct mei_me_client *me_cl;
- mutex_lock(&bus->device_lock);
me_cl = mei_me_cl_by_uuid(bus, &mei_amthif_guid);
if (me_cl)
mei_amthif_host_init(bus, me_cl);
mei_me_cl_put(me_cl);
- mutex_unlock(&bus->device_lock);
mei_cl_bus_rescan(bus);
}
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index 641c1a5..6fe0235 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -358,8 +358,9 @@
*
* Return: mei_cl_cb pointer or NULL;
*/
-struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl, enum mei_cb_file_ops type,
- const struct file *fp)
+static struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl,
+ enum mei_cb_file_ops type,
+ const struct file *fp)
{
struct mei_cl_cb *cb;
@@ -420,31 +421,6 @@
}
/**
- * mei_io_cb_alloc_buf - allocate callback buffer
- *
- * @cb: io callback structure
- * @length: size of the buffer
- *
- * Return: 0 on success
- * -EINVAL if cb is NULL
- * -ENOMEM if allocation failed
- */
-int mei_io_cb_alloc_buf(struct mei_cl_cb *cb, size_t length)
-{
- if (!cb)
- return -EINVAL;
-
- if (length == 0)
- return 0;
-
- cb->buf.data = kmalloc(length, GFP_KERNEL);
- if (!cb->buf.data)
- return -ENOMEM;
- cb->buf.size = length;
- return 0;
-}
-
-/**
* mei_cl_alloc_cb - a convenient wrapper for allocating read cb
*
* @cl: host client
@@ -455,24 +431,59 @@
* Return: cb on success and NULL on failure
*/
struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length,
- enum mei_cb_file_ops type,
+ enum mei_cb_file_ops fop_type,
const struct file *fp)
{
struct mei_cl_cb *cb;
- cb = mei_io_cb_init(cl, type, fp);
+ cb = mei_io_cb_init(cl, fop_type, fp);
if (!cb)
return NULL;
- if (mei_io_cb_alloc_buf(cb, length)) {
+ if (length == 0)
+ return cb;
+
+ cb->buf.data = kmalloc(length, GFP_KERNEL);
+ if (!cb->buf.data) {
mei_io_cb_free(cb);
return NULL;
}
+ cb->buf.size = length;
return cb;
}
/**
+ * mei_cl_enqueue_ctrl_wr_cb - a convenient wrapper for allocating
+ * and enqueuing of the control commands cb
+ *
+ * @cl: host client
+ * @length: size of the buffer
+ * @type: operation type
+ * @fp: associated file pointer (might be NULL)
+ *
+ * Return: cb on success and NULL on failure
+ * Locking: called under "dev->device_lock" lock
+ */
+struct mei_cl_cb *mei_cl_enqueue_ctrl_wr_cb(struct mei_cl *cl, size_t length,
+ enum mei_cb_file_ops fop_type,
+ const struct file *fp)
+{
+ struct mei_cl_cb *cb;
+
+ /* for RX always allocate at least client's mtu */
+ if (length)
+ length = max_t(size_t, length, mei_cl_mtu(cl));
+
+ cb = mei_cl_alloc_cb(cl, length, fop_type, fp);
+ if (!cb)
+ return NULL;
+
+ list_add_tail(&cb->list, &cl->dev->ctrl_wr_list.list);
+ return cb;
+}
+
+/**
* mei_cl_read_cb - find this cl's callback in the read list
* for a specific file
*
@@ -754,7 +765,8 @@
mei_io_list_flush(&dev->ctrl_rd_list, cl);
mei_io_list_flush(&dev->ctrl_wr_list, cl);
mei_cl_wake_all(cl);
- cl->mei_flow_ctrl_creds = 0;
+ cl->rx_flow_ctrl_creds = 0;
+ cl->tx_flow_ctrl_creds = 0;
cl->timer_count = 0;
if (!cl->me_cl)
@@ -764,7 +776,7 @@
cl->me_cl->connect_count--;
if (cl->me_cl->connect_count == 0)
- cl->me_cl->mei_flow_ctrl_creds = 0;
+ cl->me_cl->tx_flow_ctrl_creds = 0;
mei_me_cl_put(cl->me_cl);
cl->me_cl = NULL;
@@ -814,6 +826,7 @@
list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
cl->timer_count = MEI_CONNECT_TIMEOUT;
+ mei_schedule_stall_timer(dev);
return 0;
}
@@ -867,13 +880,11 @@
cl->state = MEI_FILE_DISCONNECTING;
- cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT, NULL);
- rets = cb ? 0 : -ENOMEM;
- if (rets)
+ cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DISCONNECT, NULL);
+ if (!cb) {
+ rets = -ENOMEM;
goto out;
-
- cl_dbg(dev, cl, "add disconnect cb to control write list\n");
- list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+ }
if (mei_hbuf_acquire(dev)) {
rets = mei_cl_send_disconnect(cl, cb);
@@ -1001,6 +1012,7 @@
list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
cl->timer_count = MEI_CONNECT_TIMEOUT;
+ mei_schedule_stall_timer(dev);
return 0;
}
@@ -1042,14 +1054,14 @@
*
* @cl: host client
* @me_cl: me client
- * @file: pointer to file structure
+ * @fp: pointer to file structure
*
* Locking: called under "dev->device_lock" lock
*
* Return: 0 on success, <0 on failure.
*/
int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl,
- const struct file *file)
+ const struct file *fp)
{
struct mei_device *dev;
struct mei_cl_cb *cb;
@@ -1076,12 +1088,11 @@
goto nortpm;
}
- cb = mei_io_cb_init(cl, MEI_FOP_CONNECT, file);
- rets = cb ? 0 : -ENOMEM;
- if (rets)
+ cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_CONNECT, fp);
+ if (!cb) {
+ rets = -ENOMEM;
goto out;
-
- list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+ }
/* run hbuf acquire last so we don't have to undo */
if (!mei_cl_is_other_connecting(cl) && mei_hbuf_acquire(dev)) {
@@ -1159,50 +1170,42 @@
return ERR_PTR(ret);
}
-
-
/**
- * mei_cl_flow_ctrl_creds - checks flow_control credits for cl.
+ * mei_cl_tx_flow_ctrl_creds - checks flow_control credits for cl.
*
* @cl: host client
- * @fp: the file pointer associated with the pointer
*
- * Return: 1 if mei_flow_ctrl_creds >0, 0 - otherwise.
+ * Return: 1 if tx_flow_ctrl_creds >0, 0 - otherwise.
*/
-static int mei_cl_flow_ctrl_creds(struct mei_cl *cl, const struct file *fp)
+static int mei_cl_tx_flow_ctrl_creds(struct mei_cl *cl)
{
- int rets;
-
if (WARN_ON(!cl || !cl->me_cl))
return -EINVAL;
- if (cl->mei_flow_ctrl_creds > 0)
+ if (cl->tx_flow_ctrl_creds > 0)
return 1;
- if (mei_cl_is_fixed_address(cl)) {
- rets = mei_cl_read_start(cl, mei_cl_mtu(cl), fp);
- if (rets && rets != -EBUSY)
- return rets;
+ if (mei_cl_is_fixed_address(cl))
return 1;
- }
if (mei_cl_is_single_recv_buf(cl)) {
- if (cl->me_cl->mei_flow_ctrl_creds > 0)
+ if (cl->me_cl->tx_flow_ctrl_creds > 0)
return 1;
}
return 0;
}
/**
- * mei_cl_flow_ctrl_reduce - reduces flow_control.
+ * mei_cl_tx_flow_ctrl_creds_reduce - reduces transmit flow control credits
+ * for a client
*
- * @cl: private data of the file object
+ * @cl: host client
*
* Return:
* 0 on success
* -EINVAL when ctrl credits are <= 0
*/
-static int mei_cl_flow_ctrl_reduce(struct mei_cl *cl)
+static int mei_cl_tx_flow_ctrl_creds_reduce(struct mei_cl *cl)
{
if (WARN_ON(!cl || !cl->me_cl))
return -EINVAL;
@@ -1211,13 +1214,13 @@
return 0;
if (mei_cl_is_single_recv_buf(cl)) {
- if (WARN_ON(cl->me_cl->mei_flow_ctrl_creds <= 0))
+ if (WARN_ON(cl->me_cl->tx_flow_ctrl_creds <= 0))
return -EINVAL;
- cl->me_cl->mei_flow_ctrl_creds--;
+ cl->me_cl->tx_flow_ctrl_creds--;
} else {
- if (WARN_ON(cl->mei_flow_ctrl_creds <= 0))
+ if (WARN_ON(cl->tx_flow_ctrl_creds <= 0))
return -EINVAL;
- cl->mei_flow_ctrl_creds--;
+ cl->tx_flow_ctrl_creds--;
}
return 0;
}
@@ -1292,7 +1295,7 @@
* mei_cl_notify_request - send notification stop/start request
*
* @cl: host client
- * @file: associate request with file
+ * @fp: associate request with file
* @request: 1 for start or 0 for stop
*
* Locking: called under "dev->device_lock" lock
@@ -1300,7 +1303,7 @@
* Return: 0 on such and error otherwise.
*/
int mei_cl_notify_request(struct mei_cl *cl,
- const struct file *file, u8 request)
+ const struct file *fp, u8 request)
{
struct mei_device *dev;
struct mei_cl_cb *cb;
@@ -1325,7 +1328,7 @@
}
fop_type = mei_cl_notify_req2fop(request);
- cb = mei_io_cb_init(cl, fop_type, file);
+ cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, fop_type, fp);
if (!cb) {
rets = -ENOMEM;
goto out;
@@ -1336,9 +1339,7 @@
rets = -ENODEV;
goto out;
}
- list_add_tail(&cb->list, &dev->ctrl_rd_list.list);
- } else {
- list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+ list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
}
mutex_unlock(&dev->device_lock);
@@ -1436,25 +1437,6 @@
}
/**
- * mei_cl_is_read_fc_cb - check if read cb is waiting for flow control
- * for given host client
- *
- * @cl: host client
- *
- * Return: true, if found at least one cb.
- */
-static bool mei_cl_is_read_fc_cb(struct mei_cl *cl)
-{
- struct mei_device *dev = cl->dev;
- struct mei_cl_cb *cb;
-
- list_for_each_entry(cb, &dev->ctrl_wr_list.list, list)
- if (cb->fop_type == MEI_FOP_READ && cb->cl == cl)
- return true;
- return false;
-}
-
-/**
* mei_cl_read_start - the start read client message function.
*
* @cl: host client
@@ -1477,26 +1459,22 @@
if (!mei_cl_is_connected(cl))
return -ENODEV;
- /* HW currently supports only one pending read */
- if (!list_empty(&cl->rd_pending) || mei_cl_is_read_fc_cb(cl))
- return -EBUSY;
-
if (!mei_me_cl_is_active(cl->me_cl)) {
cl_err(dev, cl, "no such me client\n");
return -ENOTTY;
}
- /* always allocate at least client max message */
- length = max_t(size_t, length, mei_cl_mtu(cl));
- cb = mei_cl_alloc_cb(cl, length, MEI_FOP_READ, fp);
+ if (mei_cl_is_fixed_address(cl) || cl == &dev->iamthif_cl)
+ return 0;
+
+ /* HW currently supports only one pending read */
+ if (cl->rx_flow_ctrl_creds)
+ return -EBUSY;
+
+ cb = mei_cl_enqueue_ctrl_wr_cb(cl, length, MEI_FOP_READ, fp);
if (!cb)
return -ENOMEM;
- if (mei_cl_is_fixed_address(cl)) {
- list_add_tail(&cb->list, &cl->rd_pending);
- return 0;
- }
-
rets = pm_runtime_get(dev->dev);
if (rets < 0 && rets != -EINPROGRESS) {
pm_runtime_put_noidle(dev->dev);
@@ -1504,16 +1482,15 @@
goto nortpm;
}
+ rets = 0;
if (mei_hbuf_acquire(dev)) {
rets = mei_hbm_cl_flow_control_req(dev, cl);
if (rets < 0)
goto out;
- list_add_tail(&cb->list, &cl->rd_pending);
- } else {
- rets = 0;
- list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+ list_move_tail(&cb->list, &cl->rd_pending);
}
+ cl->rx_flow_ctrl_creds++;
out:
cl_dbg(dev, cl, "rpm: autosuspend\n");
@@ -1557,7 +1534,7 @@
first_chunk = cb->buf_idx == 0;
- rets = first_chunk ? mei_cl_flow_ctrl_creds(cl, cb->fp) : 1;
+ rets = first_chunk ? mei_cl_tx_flow_ctrl_creds(cl) : 1;
if (rets < 0)
return rets;
@@ -1605,7 +1582,7 @@
cb->completed = mei_hdr.msg_complete == 1;
if (first_chunk) {
- if (mei_cl_flow_ctrl_reduce(cl))
+ if (mei_cl_tx_flow_ctrl_creds_reduce(cl))
return -EIO;
}
@@ -1663,7 +1640,7 @@
mei_hdr.msg_complete = 0;
mei_hdr.internal = cb->internal;
- rets = mei_cl_flow_ctrl_creds(cl, cb->fp);
+ rets = mei_cl_tx_flow_ctrl_creds(cl);
if (rets < 0)
goto err;
@@ -1691,7 +1668,7 @@
if (rets)
goto err;
- rets = mei_cl_flow_ctrl_reduce(cl);
+ rets = mei_cl_tx_flow_ctrl_creds_reduce(cl);
if (rets)
goto err;
@@ -1761,6 +1738,9 @@
case MEI_FOP_READ:
list_add_tail(&cb->list, &cl->rd_completed);
+ if (!mei_cl_is_fixed_address(cl) &&
+ !WARN_ON(!cl->rx_flow_ctrl_creds))
+ cl->rx_flow_ctrl_creds--;
if (!mei_cl_bus_rx_event(cl))
wake_up_interruptible(&cl->rx_wait);
break;
diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h
index 0d7a3a1..d2bfabe 100644
--- a/drivers/misc/mei/client.h
+++ b/drivers/misc/mei/client.h
@@ -82,11 +82,7 @@
/*
* MEI IO Functions
*/
-struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl, enum mei_cb_file_ops type,
- const struct file *fp);
void mei_io_cb_free(struct mei_cl_cb *priv_cb);
-int mei_io_cb_alloc_buf(struct mei_cl_cb *cb, size_t length);
-
/**
* mei_io_list_init - Sets up a queue list.
@@ -118,6 +114,9 @@
struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length,
enum mei_cb_file_ops type,
const struct file *fp);
+struct mei_cl_cb *mei_cl_enqueue_ctrl_wr_cb(struct mei_cl *cl, size_t length,
+ enum mei_cb_file_ops type,
+ const struct file *fp);
int mei_cl_flush_queues(struct mei_cl *cl, const struct file *fp);
/*
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index 085f3aa..dd7f15a 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -161,6 +161,7 @@
* @dev: the device structure
* @cl: client
* @hbm_cmd: host bus message command
+ * @buf: message buffer
* @len: buffer length
*
* Return: 0 on success, <0 on failure.
@@ -276,6 +277,7 @@
dev->hbm_state = MEI_HBM_STARTING;
dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
+ mei_schedule_stall_timer(dev);
return 0;
}
@@ -311,6 +313,7 @@
}
dev->hbm_state = MEI_HBM_ENUM_CLIENTS;
dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
+ mei_schedule_stall_timer(dev);
return 0;
}
@@ -339,7 +342,7 @@
me_cl->props = res->client_properties;
me_cl->client_id = res->me_addr;
- me_cl->mei_flow_ctrl_creds = 0;
+ me_cl->tx_flow_ctrl_creds = 0;
mei_me_cl_add(dev, me_cl);
@@ -561,6 +564,7 @@
}
dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
+ mei_schedule_stall_timer(dev);
return 0;
}
@@ -636,23 +640,22 @@
}
/**
- * mei_hbm_add_single_flow_creds - adds single buffer credentials.
+ * mei_hbm_add_single_tx_flow_ctrl_creds - adds single buffer credentials.
*
* @dev: the device structure
- * @flow: flow control.
+ * @fctrl: flow control response bus message
*
* Return: 0 on success, < 0 otherwise
*/
-static int mei_hbm_add_single_flow_creds(struct mei_device *dev,
- struct hbm_flow_control *flow)
+static int mei_hbm_add_single_tx_flow_ctrl_creds(struct mei_device *dev,
+ struct hbm_flow_control *fctrl)
{
struct mei_me_client *me_cl;
int rets;
- me_cl = mei_me_cl_by_id(dev, flow->me_addr);
+ me_cl = mei_me_cl_by_id(dev, fctrl->me_addr);
if (!me_cl) {
- dev_err(dev->dev, "no such me client %d\n",
- flow->me_addr);
+ dev_err(dev->dev, "no such me client %d\n", fctrl->me_addr);
return -ENOENT;
}
@@ -661,9 +664,9 @@
goto out;
}
- me_cl->mei_flow_ctrl_creds++;
+ me_cl->tx_flow_ctrl_creds++;
dev_dbg(dev->dev, "recv flow ctrl msg ME %d (single) creds = %d.\n",
- flow->me_addr, me_cl->mei_flow_ctrl_creds);
+ fctrl->me_addr, me_cl->tx_flow_ctrl_creds);
rets = 0;
out:
@@ -675,24 +678,24 @@
* mei_hbm_cl_flow_control_res - flow control response from me
*
* @dev: the device structure
- * @flow_control: flow control response bus message
+ * @fctrl: flow control response bus message
*/
-static void mei_hbm_cl_flow_control_res(struct mei_device *dev,
- struct hbm_flow_control *flow_control)
+static void mei_hbm_cl_tx_flow_ctrl_creds_res(struct mei_device *dev,
+ struct hbm_flow_control *fctrl)
{
struct mei_cl *cl;
- if (!flow_control->host_addr) {
+ if (!fctrl->host_addr) {
/* single receive buffer */
- mei_hbm_add_single_flow_creds(dev, flow_control);
+ mei_hbm_add_single_tx_flow_ctrl_creds(dev, fctrl);
return;
}
- cl = mei_hbm_cl_find_by_cmd(dev, flow_control);
+ cl = mei_hbm_cl_find_by_cmd(dev, fctrl);
if (cl) {
- cl->mei_flow_ctrl_creds++;
+ cl->tx_flow_ctrl_creds++;
cl_dbg(dev, cl, "flow control creds = %d.\n",
- cl->mei_flow_ctrl_creds);
+ cl->tx_flow_ctrl_creds);
}
}
@@ -871,10 +874,10 @@
cl->state = MEI_FILE_DISCONNECTING;
cl->timer_count = 0;
- cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT_RSP, NULL);
+ cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DISCONNECT_RSP,
+ NULL);
if (!cb)
return -ENOMEM;
- list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
}
return 0;
}
@@ -1022,7 +1025,7 @@
struct mei_hbm_cl_cmd *cl_cmd;
struct hbm_client_connect_request *disconnect_req;
- struct hbm_flow_control *flow_control;
+ struct hbm_flow_control *fctrl;
/* read the message to our buffer */
BUG_ON(hdr->length >= sizeof(dev->rd_msg_buf));
@@ -1102,8 +1105,8 @@
case MEI_FLOW_CONTROL_CMD:
dev_dbg(dev->dev, "hbm: client flow control response: message received.\n");
- flow_control = (struct hbm_flow_control *) mei_msg;
- mei_hbm_cl_flow_control_res(dev, flow_control);
+ fctrl = (struct hbm_flow_control *)mei_msg;
+ mei_hbm_cl_tx_flow_ctrl_creds_res(dev, fctrl);
break;
case MEI_PG_ISOLATION_ENTRY_RES_CMD:
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
index 0dcb854..7ad15d6 100644
--- a/drivers/misc/mei/hw-me-regs.h
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -125,6 +125,9 @@
#define MEI_DEV_ID_BXT_M 0x1A9A /* Broxton M */
#define MEI_DEV_ID_APL_I 0x5A9A /* Apollo Lake I */
+#define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */
+#define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */
+
/*
* MEI HW Section
*/
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index dc3a854..56c2101 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -18,6 +18,7 @@
#include <linux/kthread.h>
#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
#include "mei_dev.h"
#include "hbm.h"
@@ -1063,6 +1064,8 @@
}
}
+ pm_runtime_set_active(dev->dev);
+
hcsr = mei_hcsr_read(dev);
/* H_RST may be found lit before reset is started,
* for example if preceding reset flow hasn't completed.
diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c
index 4a6c1b8..e6e5e55 100644
--- a/drivers/misc/mei/hw-txe.c
+++ b/drivers/misc/mei/hw-txe.c
@@ -20,6 +20,7 @@
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/irqreturn.h>
+#include <linux/pm_runtime.h>
#include <linux/mei.h>
@@ -935,6 +936,8 @@
return ret;
}
+ pm_runtime_set_active(dev->dev);
+
/* enable input ready interrupts:
* SEC_IPC_HOST_INT_MASK.IPC_INPUT_READY_INT_MASK
*/
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index f7c8dfd..9a9c248 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -94,7 +94,7 @@
cancel_work_sync(&dev->reset_work);
cancel_work_sync(&dev->bus_rescan_work);
- cancel_delayed_work(&dev->timer_work);
+ cancel_delayed_work_sync(&dev->timer_work);
}
EXPORT_SYMBOL_GPL(mei_cancel_work);
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index 3831a7b..5a4893c 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -102,26 +102,25 @@
{
struct mei_device *dev = cl->dev;
struct mei_cl_cb *cb;
- unsigned char *buffer = NULL;
size_t buf_sz;
cb = list_first_entry_or_null(&cl->rd_pending, struct mei_cl_cb, list);
if (!cb) {
- cl_err(dev, cl, "pending read cb not found\n");
- goto out;
+ if (!mei_cl_is_fixed_address(cl)) {
+ cl_err(dev, cl, "pending read cb not found\n");
+ goto discard;
+ }
+ cb = mei_cl_alloc_cb(cl, mei_cl_mtu(cl), MEI_FOP_READ, cl->fp);
+ if (!cb)
+ goto discard;
+ list_add_tail(&cb->list, &cl->rd_pending);
}
if (!mei_cl_is_connected(cl)) {
cl_dbg(dev, cl, "not connected\n");
- cb->status = -ENODEV;
- goto out;
- }
-
- if (cb->buf.size == 0 || cb->buf.data == NULL) {
- cl_err(dev, cl, "response buffer is not allocated.\n");
list_move_tail(&cb->list, &complete_list->list);
- cb->status = -ENOMEM;
- goto out;
+ cb->status = -ENODEV;
+ goto discard;
}
buf_sz = mei_hdr->length + cb->buf_idx;
@@ -132,25 +131,19 @@
list_move_tail(&cb->list, &complete_list->list);
cb->status = -EMSGSIZE;
- goto out;
+ goto discard;
}
if (cb->buf.size < buf_sz) {
cl_dbg(dev, cl, "message overflow. size %zu len %d idx %zu\n",
cb->buf.size, mei_hdr->length, cb->buf_idx);
- buffer = krealloc(cb->buf.data, buf_sz, GFP_KERNEL);
- if (!buffer) {
- cb->status = -ENOMEM;
- list_move_tail(&cb->list, &complete_list->list);
- goto out;
- }
- cb->buf.data = buffer;
- cb->buf.size = buf_sz;
+ list_move_tail(&cb->list, &complete_list->list);
+ cb->status = -EMSGSIZE;
+ goto discard;
}
- buffer = cb->buf.data + cb->buf_idx;
- mei_read_slots(dev, buffer, mei_hdr->length);
+ mei_read_slots(dev, cb->buf.data + cb->buf_idx, mei_hdr->length);
cb->buf_idx += mei_hdr->length;
@@ -162,10 +155,10 @@
pm_request_autosuspend(dev->dev);
}
-out:
- if (!buffer)
- mei_irq_discard_msg(dev, mei_hdr);
+ return 0;
+discard:
+ mei_irq_discard_msg(dev, mei_hdr);
return 0;
}
@@ -216,6 +209,9 @@
int slots;
int ret;
+ if (!list_empty(&cl->rd_pending))
+ return 0;
+
msg_slots = mei_data2slots(sizeof(struct hbm_flow_control));
slots = mei_hbuf_empty_slots(dev);
@@ -463,6 +459,19 @@
mei_reset(dev);
}
+#define MEI_STALL_TIMER_FREQ (2 * HZ)
+/**
+ * mei_schedule_stall_timer - re-arm stall_timer work
+ *
+ * Schedule stall timer
+ *
+ * @dev: the device structure
+ */
+void mei_schedule_stall_timer(struct mei_device *dev)
+{
+ schedule_delayed_work(&dev->timer_work, MEI_STALL_TIMER_FREQ);
+}
+
/**
* mei_timer - timer function.
*
@@ -472,10 +481,9 @@
void mei_timer(struct work_struct *work)
{
struct mei_cl *cl;
-
struct mei_device *dev = container_of(work,
struct mei_device, timer_work.work);
-
+ bool reschedule_timer = false;
mutex_lock(&dev->device_lock);
@@ -490,6 +498,7 @@
mei_reset(dev);
goto out;
}
+ reschedule_timer = true;
}
}
@@ -504,6 +513,7 @@
mei_connect_timeout(cl);
goto out;
}
+ reschedule_timer = true;
}
}
@@ -514,19 +524,16 @@
if (--dev->iamthif_stall_timer == 0) {
dev_err(dev->dev, "timer: amthif hanged.\n");
mei_reset(dev);
- dev->iamthif_canceled = false;
- dev->iamthif_state = MEI_IAMTHIF_IDLE;
- mei_io_cb_free(dev->iamthif_current_cb);
- dev->iamthif_current_cb = NULL;
-
- dev->iamthif_fp = NULL;
mei_amthif_run_next_cmd(dev);
+ goto out;
}
+ reschedule_timer = true;
}
out:
- if (dev->dev_state != MEI_DEV_DISABLED)
- schedule_delayed_work(&dev->timer_work, 2 * HZ);
+ if (dev->dev_state != MEI_DEV_DISABLED && reschedule_timer)
+ mei_schedule_stall_timer(dev);
+
mutex_unlock(&dev->device_lock);
}
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 52635b0..fa50635 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -71,6 +71,7 @@
goto err_unlock;
}
+ cl->fp = file;
file->private_data = cl;
mutex_unlock(&dev->device_lock);
@@ -138,9 +139,8 @@
struct mei_cl *cl = file->private_data;
struct mei_device *dev;
struct mei_cl_cb *cb = NULL;
+ bool nonblock = !!(file->f_flags & O_NONBLOCK);
int rets;
- int err;
-
if (WARN_ON(!cl || !cl->dev))
return -ENODEV;
@@ -164,11 +164,6 @@
goto out;
}
- if (cl == &dev->iamthif_cl) {
- rets = mei_amthif_read(dev, file, ubuf, length, offset);
- goto out;
- }
-
cb = mei_cl_read_cb(cl, file);
if (cb)
goto copy_buffer;
@@ -176,24 +171,29 @@
if (*offset > 0)
*offset = 0;
- err = mei_cl_read_start(cl, length, file);
- if (err && err != -EBUSY) {
- cl_dbg(dev, cl, "mei start read failure status = %d\n", err);
- rets = err;
+ rets = mei_cl_read_start(cl, length, file);
+ if (rets && rets != -EBUSY) {
+ cl_dbg(dev, cl, "mei start read failure status = %d\n", rets);
goto out;
}
- if (list_empty(&cl->rd_completed) && !waitqueue_active(&cl->rx_wait)) {
- if (file->f_flags & O_NONBLOCK) {
- rets = -EAGAIN;
- goto out;
- }
+ if (nonblock) {
+ rets = -EAGAIN;
+ goto out;
+ }
+ if (rets == -EBUSY &&
+ !mei_cl_enqueue_ctrl_wr_cb(cl, length, MEI_FOP_READ, file)) {
+ rets = -ENOMEM;
+ goto out;
+ }
+
+ do {
mutex_unlock(&dev->device_lock);
if (wait_event_interruptible(cl->rx_wait,
- (!list_empty(&cl->rd_completed)) ||
- (!mei_cl_is_connected(cl)))) {
+ (!list_empty(&cl->rd_completed)) ||
+ (!mei_cl_is_connected(cl)))) {
if (signal_pending(current))
return -EINTR;
@@ -202,16 +202,12 @@
mutex_lock(&dev->device_lock);
if (!mei_cl_is_connected(cl)) {
- rets = -EBUSY;
+ rets = -ENODEV;
goto out;
}
- }
- cb = mei_cl_read_cb(cl, file);
- if (!cb) {
- rets = 0;
- goto out;
- }
+ cb = mei_cl_read_cb(cl, file);
+ } while (!cb);
copy_buffer:
/* now copy the data to user space */
@@ -609,24 +605,24 @@
goto out;
}
- if (cl == &dev->iamthif_cl) {
- mask = mei_amthif_poll(dev, file, wait);
- goto out;
- }
-
if (notify_en) {
poll_wait(file, &cl->ev_wait, wait);
if (cl->notify_ev)
mask |= POLLPRI;
}
+ if (cl == &dev->iamthif_cl) {
+ mask |= mei_amthif_poll(file, wait);
+ goto out;
+ }
+
if (req_events & (POLLIN | POLLRDNORM)) {
poll_wait(file, &cl->rx_wait, wait);
if (!list_empty(&cl->rd_completed))
mask |= POLLIN | POLLRDNORM;
else
- mei_cl_read_start(cl, 0, file);
+ mei_cl_read_start(cl, mei_cl_mtu(cl), file);
}
out:
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index e5e3250..1169fd9 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -80,18 +80,13 @@
enum iamthif_states {
MEI_IAMTHIF_IDLE,
MEI_IAMTHIF_WRITING,
- MEI_IAMTHIF_FLOW_CONTROL,
MEI_IAMTHIF_READING,
- MEI_IAMTHIF_READ_COMPLETE
};
enum mei_file_transaction_states {
MEI_IDLE,
MEI_WRITING,
MEI_WRITE_COMPLETE,
- MEI_FLOW_CONTROL,
- MEI_READING,
- MEI_READ_COMPLETE
};
/**
@@ -146,7 +141,7 @@
* @refcnt: struct reference count
* @props: client properties
* @client_id: me client id
- * @mei_flow_ctrl_creds: flow control credits
+ * @tx_flow_ctrl_creds: flow control credits
* @connect_count: number connections to this client
* @bus_added: added to bus
*/
@@ -155,7 +150,7 @@
struct kref refcnt;
struct mei_client_properties props;
u8 client_id;
- u8 mei_flow_ctrl_creds;
+ u8 tx_flow_ctrl_creds;
u8 connect_count;
u8 bus_added;
};
@@ -202,10 +197,11 @@
* @ev_async: event async notification
* @status: connection status
* @me_cl: fw client connected
+ * @fp: file associated with client
* @host_client_id: host id
- * @mei_flow_ctrl_creds: transmit flow credentials
+ * @tx_flow_ctrl_creds: transmit flow credentials
+ * @rx_flow_ctrl_creds: receive flow credentials
* @timer_count: watchdog timer for operation completion
- * @reserved: reserved for alignment
* @notify_en: notification - enabled/disabled
* @notify_ev: pending notification event
* @writing_state: state of the tx
@@ -225,10 +221,11 @@
struct fasync_struct *ev_async;
int status;
struct mei_me_client *me_cl;
+ const struct file *fp;
u8 host_client_id;
- u8 mei_flow_ctrl_creds;
+ u8 tx_flow_ctrl_creds;
+ u8 rx_flow_ctrl_creds;
u8 timer_count;
- u8 reserved;
u8 notify_en;
u8 notify_ev;
enum mei_file_transaction_states writing_state;
@@ -400,9 +397,7 @@
* @override_fixed_address: force allow fixed address behavior
*
* @amthif_cmd_list : amthif list for cmd waiting
- * @iamthif_fp : file for current amthif operation
* @iamthif_cl : amthif host client
- * @iamthif_current_cb : amthif current operation callback
* @iamthif_open_count : number of opened amthif connections
* @iamthif_stall_timer : timer to detect amthif hang
* @iamthif_state : amthif processor state
@@ -484,10 +479,7 @@
/* amthif list for cmd waiting */
struct mei_cl_cb amthif_cmd_list;
- /* driver managed amthif list for reading completed amthif cmd data */
- const struct file *iamthif_fp;
struct mei_cl iamthif_cl;
- struct mei_cl_cb *iamthif_current_cb;
long iamthif_open_count;
u32 iamthif_stall_timer;
enum iamthif_states iamthif_state;
@@ -556,6 +548,7 @@
*/
void mei_timer(struct work_struct *work);
+void mei_schedule_stall_timer(struct mei_device *dev);
int mei_irq_read_handler(struct mei_device *dev,
struct mei_cl_cb *cmpl_list, s32 *slots);
@@ -569,11 +562,7 @@
int mei_amthif_host_init(struct mei_device *dev, struct mei_me_client *me_cl);
-int mei_amthif_read(struct mei_device *dev, struct file *file,
- char __user *ubuf, size_t length, loff_t *offset);
-
-unsigned int mei_amthif_poll(struct mei_device *dev,
- struct file *file, poll_table *wait);
+unsigned int mei_amthif_poll(struct file *file, poll_table *wait);
int mei_amthif_release(struct mei_device *dev, struct file *file);
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index 71cea9b..f3ffd88 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -91,6 +91,9 @@
{MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, mei_me_pch8_cfg)},
{MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, mei_me_pch8_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, mei_me_pch8_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, mei_me_pch8_cfg)},
+
/* required last entry */
{0, }
};
@@ -217,8 +220,6 @@
pci_set_drvdata(pdev, dev);
- schedule_delayed_work(&dev->timer_work, HZ);
-
/*
* For not wake-able HW runtime pm framework
* can't be used on pci device level.
@@ -400,6 +401,9 @@
dev_dbg(&pdev->dev, "rpm: me: runtime suspend ret=%d\n", ret);
+ if (ret && ret != -EAGAIN)
+ schedule_work(&dev->reset_work);
+
return ret;
}
@@ -423,6 +427,9 @@
dev_dbg(&pdev->dev, "rpm: me: runtime resume ret = %d\n", ret);
+ if (ret)
+ schedule_work(&dev->reset_work);
+
return ret;
}
diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c
index 30cc306..58ffd30 100644
--- a/drivers/misc/mei/pci-txe.c
+++ b/drivers/misc/mei/pci-txe.c
@@ -347,6 +347,10 @@
dev_dbg(&pdev->dev, "rpm: txe: runtime suspend ret=%d\n", ret);
mutex_unlock(&dev->device_lock);
+
+ if (ret && ret != -EAGAIN)
+ schedule_work(&dev->reset_work);
+
return ret;
}
@@ -372,6 +376,9 @@
dev_dbg(&pdev->dev, "rpm: txe: runtime resume ret = %d\n", ret);
+ if (ret)
+ schedule_work(&dev->reset_work);
+
return ret;
}
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
index cd01a0e..64d5760 100644
--- a/drivers/misc/mic/scif/scif_dma.c
+++ b/drivers/misc/mic/scif/scif_dma.c
@@ -115,7 +115,6 @@
*/
static
void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn,
- struct scif_endpt *ep,
u64 start, u64 len)
{
struct list_head *item, *tmp;
@@ -128,7 +127,6 @@
list_for_each_safe(item, tmp, &mmn->tc_reg_list) {
window = list_entry(item, struct scif_window, list);
- ep = (struct scif_endpt *)window->ep;
if (!len)
break;
start_va = window->va_for_temp;
@@ -146,7 +144,7 @@
struct scif_endpt *ep = mmn->ep;
spin_lock(&ep->rma_info.tc_lock);
- __scif_rma_destroy_tcw(mmn, ep, start, len);
+ __scif_rma_destroy_tcw(mmn, start, len);
spin_unlock(&ep->rma_info.tc_lock);
}
@@ -169,7 +167,7 @@
spin_lock(&ep->rma_info.tc_lock);
list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
mmn = list_entry(item, struct scif_mmu_notif, list);
- __scif_rma_destroy_tcw(mmn, ep, 0, ULONG_MAX);
+ __scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
}
spin_unlock(&ep->rma_info.tc_lock);
}
diff --git a/drivers/misc/mic/scif/scif_mmap.c b/drivers/misc/mic/scif/scif_mmap.c
index 49cb8f7..9282116 100644
--- a/drivers/misc/mic/scif/scif_mmap.c
+++ b/drivers/misc/mic/scif/scif_mmap.c
@@ -552,7 +552,7 @@
{
struct scif_endpt *ep;
struct vma_pvt *vmapvt = vma->vm_private_data;
- int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ int nr_pages = vma_pages(vma);
s64 offset;
struct scif_rma_req req;
struct scif_window *window = NULL;
@@ -614,7 +614,7 @@
struct scif_window *window = NULL;
struct scif_endpt *ep = (struct scif_endpt *)epd;
s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
- int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ int nr_pages = vma_pages(vma);
int err;
struct vma_pvt *vmapvt;
diff --git a/drivers/misc/pch_phub.c b/drivers/misc/pch_phub.c
index 4810e03..e42bdc9 100644
--- a/drivers/misc/pch_phub.c
+++ b/drivers/misc/pch_phub.c
@@ -28,6 +28,7 @@
#include <linux/if_ether.h>
#include <linux/ctype.h>
#include <linux/dmi.h>
+#include <linux/of.h>
#define PHUB_STATUS 0x00 /* Status Register offset */
#define PHUB_CONTROL 0x04 /* Control Register offset */
@@ -57,6 +58,7 @@
/* CM-iTC */
#define CLKCFG_UART_48MHZ (1 << 16)
+#define CLKCFG_UART_25MHZ (2 << 16)
#define CLKCFG_BAUDDIV (2 << 20)
#define CLKCFG_PLL2VCO (8 << 9)
#define CLKCFG_UARTCLKSEL (1 << 18)
@@ -711,6 +713,12 @@
if (id->driver_data == 1) { /* EG20T PCH */
const char *board_name;
+ unsigned int prefetch = 0x000affaa;
+
+ if (pdev->dev.of_node)
+ of_property_read_u32(pdev->dev.of_node,
+ "intel,eg20t-prefetch",
+ &prefetch);
ret = sysfs_create_file(&pdev->dev.kobj,
&dev_attr_pch_mac.attr);
@@ -736,11 +744,21 @@
CLKCFG_UART_MASK);
/* set the prefech value */
- iowrite32(0x000affaa, chip->pch_phub_base_address + 0x14);
+ iowrite32(prefetch, chip->pch_phub_base_address + 0x14);
/* set the interrupt delay value */
iowrite32(0x25, chip->pch_phub_base_address + 0x44);
chip->pch_opt_rom_start_address = PCH_PHUB_ROM_START_ADDR_EG20T;
chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_EG20T;
+
+ /* quirk for MIPS Boston platform */
+ if (pdev->dev.of_node) {
+ if (of_machine_is_compatible("img,boston")) {
+ pch_phub_read_modify_write_reg(chip,
+ (unsigned int)CLKCFG_REG_OFFSET,
+ CLKCFG_UART_25MHZ,
+ CLKCFG_UART_MASK);
+ }
+ }
} else if (id->driver_data == 2) { /* ML7213 IOH */
ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
if (ret)
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
index 9ec262a..ec09010 100644
--- a/drivers/misc/vmw_vmci/vmci_host.c
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -381,18 +381,12 @@
return -EINVAL;
}
- dg = kmalloc(send_info.len, GFP_KERNEL);
- if (!dg) {
+ dg = memdup_user((void __user *)(uintptr_t)send_info.addr,
+ send_info.len);
+ if (IS_ERR(dg)) {
vmci_ioctl_err(
"cannot allocate memory to dispatch datagram\n");
- return -ENOMEM;
- }
-
- if (copy_from_user(dg, (void __user *)(uintptr_t)send_info.addr,
- send_info.len)) {
- vmci_ioctl_err("error getting datagram\n");
- kfree(dg);
- return -EFAULT;
+ return PTR_ERR(dg);
}
if (VMCI_DG_SIZE(dg) != send_info.len) {
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 32380d5..767af20 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1112,11 +1112,12 @@
div = (host->bus_hz != clock) ? DIV_ROUND_UP(div, 2) : 0;
- dev_info(&slot->mmc->class_dev,
- "Bus speed (slot %d) = %dHz (slot req %dHz, actual %dHZ div = %d)\n",
- slot->id, host->bus_hz, clock,
- div ? ((host->bus_hz / div) >> 1) :
- host->bus_hz, div);
+ if (clock != slot->__clk_old || force_clkinit)
+ dev_info(&slot->mmc->class_dev,
+ "Bus speed (slot %d) = %dHz (slot req %dHz, actual %dHZ div = %d)\n",
+ slot->id, host->bus_hz, clock,
+ div ? ((host->bus_hz / div) >> 1) :
+ host->bus_hz, div);
/* disable clock */
mci_writel(host, CLKENA, 0);
@@ -1139,6 +1140,9 @@
/* inform CIU */
mci_send_cmd(slot, sdmmc_cmd_bits, 0);
+
+ /* keep the last clock value that was requested from core */
+ slot->__clk_old = clock;
}
host->current_speed = clock;
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 9e740bc..e8cd2de 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -249,6 +249,8 @@
* @queue_node: List node for placing this node in the @queue list of
* &struct dw_mci.
* @clock: Clock rate configured by set_ios(). Protected by host->lock.
+ * @__clk_old: The last clock value that was requested from core.
+ * Keeping track of this helps us to avoid spamming the console.
* @flags: Random state bits associated with the slot.
* @id: Number of this slot.
* @sdio_id: Number of this slot in the SDIO interrupt registers.
@@ -263,6 +265,7 @@
struct list_head queue_node;
unsigned int clock;
+ unsigned int __clk_old;
unsigned long flags;
#define DW_MMC_CARD_PRESENT 0
diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index cc07ba0..27fa8b8 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -240,6 +240,9 @@
unsigned long flags;
u32 val;
+ /* Reset ECC hardware */
+ davinci_nand_readl(info, NAND_4BIT_ECC1_OFFSET);
+
spin_lock_irqsave(&davinci_nand_lock, flags);
/* Start 4-bit ECC calculation for read/write */
diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c
index 25a4fbd..d54f666 100644
--- a/drivers/mtd/nand/mtk_ecc.c
+++ b/drivers/mtd/nand/mtk_ecc.c
@@ -366,7 +366,8 @@
u8 *data, u32 bytes)
{
dma_addr_t addr;
- u32 *p, len, i;
+ u8 *p;
+ u32 len, i, val;
int ret = 0;
addr = dma_map_single(ecc->dev, data, bytes, DMA_TO_DEVICE);
@@ -392,11 +393,14 @@
/* Program ECC bytes to OOB: per sector oob = FDM + ECC + SPARE */
len = (config->strength * ECC_PARITY_BITS + 7) >> 3;
- p = (u32 *)(data + bytes);
+ p = data + bytes;
/* write the parity bytes generated by the ECC back to the OOB region */
- for (i = 0; i < len; i++)
- p[i] = readl(ecc->regs + ECC_ENCPAR(i));
+ for (i = 0; i < len; i++) {
+ if ((i % 4) == 0)
+ val = readl(ecc->regs + ECC_ENCPAR(i / 4));
+ p[i] = (val >> ((i % 4) * 8)) & 0xff;
+ }
timeout:
dma_unmap_single(ecc->dev, addr, bytes, DMA_TO_DEVICE);
diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c
index ddaa2ac..5223a21 100644
--- a/drivers/mtd/nand/mtk_nand.c
+++ b/drivers/mtd/nand/mtk_nand.c
@@ -93,6 +93,9 @@
#define NFI_FSM_MASK (0xf << 16)
#define NFI_ADDRCNTR (0x70)
#define CNTR_MASK GENMASK(16, 12)
+#define ADDRCNTR_SEC_SHIFT (12)
+#define ADDRCNTR_SEC(val) \
+ (((val) & CNTR_MASK) >> ADDRCNTR_SEC_SHIFT)
#define NFI_STRADDR (0x80)
#define NFI_BYTELEN (0x84)
#define NFI_CSEL (0x90)
@@ -699,7 +702,7 @@
}
ret = readl_poll_timeout_atomic(nfc->regs + NFI_ADDRCNTR, reg,
- (reg & CNTR_MASK) >= chip->ecc.steps,
+ ADDRCNTR_SEC(reg) >= chip->ecc.steps,
10, MTK_TIMEOUT);
if (ret)
dev_err(dev, "hwecc write timeout\n");
@@ -902,7 +905,7 @@
dev_warn(nfc->dev, "read ahb/dma done timeout\n");
rc = readl_poll_timeout_atomic(nfc->regs + NFI_BYTELEN, reg,
- (reg & CNTR_MASK) >= sectors, 10,
+ ADDRCNTR_SEC(reg) >= sectors, 10,
MTK_TIMEOUT);
if (rc < 0) {
dev_err(nfc->dev, "subpage done timeout\n");
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 5173fad..57cbe2b 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -943,7 +943,7 @@
struct nand_chip *nand_chip = mtd_to_nand(mtd);
int stepsize = nand_chip->ecc.bytes == 9 ? 16 : 26;
- if (section > nand_chip->ecc.steps)
+ if (section >= nand_chip->ecc.steps)
return -ERANGE;
if (!section) {
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index a59361c..5513bfd9 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -2169,7 +2169,7 @@
return 0;
return_error:
- if (info->dma)
+ if (!IS_ERR_OR_NULL(info->dma))
dma_release_channel(info->dma);
if (nand_chip->ecc.priv) {
nand_bch_free(nand_chip->ecc.priv);
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index e21f7cc..8d6208c 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -21,6 +21,7 @@
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
+#include <linux/workqueue.h>
#include <linux/can.h>
#include <linux/can/dev.h>
#include <linux/can/skb.h>
@@ -501,9 +502,8 @@
/*
* CAN device restart for bus-off recovery
*/
-static void can_restart(unsigned long data)
+static void can_restart(struct net_device *dev)
{
- struct net_device *dev = (struct net_device *)data;
struct can_priv *priv = netdev_priv(dev);
struct net_device_stats *stats = &dev->stats;
struct sk_buff *skb;
@@ -543,6 +543,14 @@
netdev_err(dev, "Error %d during restart", err);
}
+static void can_restart_work(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct can_priv *priv = container_of(dwork, struct can_priv, restart_work);
+
+ can_restart(priv->dev);
+}
+
int can_restart_now(struct net_device *dev)
{
struct can_priv *priv = netdev_priv(dev);
@@ -556,8 +564,8 @@
if (priv->state != CAN_STATE_BUS_OFF)
return -EBUSY;
- /* Runs as soon as possible in the timer context */
- mod_timer(&priv->restart_timer, jiffies);
+ cancel_delayed_work_sync(&priv->restart_work);
+ can_restart(dev);
return 0;
}
@@ -578,8 +586,8 @@
netif_carrier_off(dev);
if (priv->restart_ms)
- mod_timer(&priv->restart_timer,
- jiffies + (priv->restart_ms * HZ) / 1000);
+ schedule_delayed_work(&priv->restart_work,
+ msecs_to_jiffies(priv->restart_ms));
}
EXPORT_SYMBOL_GPL(can_bus_off);
@@ -688,6 +696,7 @@
return NULL;
priv = netdev_priv(dev);
+ priv->dev = dev;
if (echo_skb_max) {
priv->echo_skb_max = echo_skb_max;
@@ -697,7 +706,7 @@
priv->state = CAN_STATE_STOPPED;
- init_timer(&priv->restart_timer);
+ INIT_DELAYED_WORK(&priv->restart_work, can_restart_work);
return dev;
}
@@ -778,8 +787,6 @@
if (!netif_carrier_ok(dev))
netif_carrier_on(dev);
- setup_timer(&priv->restart_timer, can_restart, (unsigned long)dev);
-
return 0;
}
EXPORT_SYMBOL_GPL(open_candev);
@@ -794,7 +801,7 @@
{
struct can_priv *priv = netdev_priv(dev);
- del_timer_sync(&priv->restart_timer);
+ cancel_delayed_work_sync(&priv->restart_work);
can_flush_echo_skb(dev);
}
EXPORT_SYMBOL_GPL(close_candev);
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 41c0fc9..16f7cad 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -1268,11 +1268,10 @@
struct flexcan_priv *priv = netdev_priv(dev);
int err;
- err = flexcan_chip_disable(priv);
- if (err)
- return err;
-
if (netif_running(dev)) {
+ err = flexcan_chip_disable(priv);
+ if (err)
+ return err;
netif_stop_queue(dev);
netif_device_detach(dev);
}
@@ -1285,13 +1284,17 @@
{
struct net_device *dev = dev_get_drvdata(device);
struct flexcan_priv *priv = netdev_priv(dev);
+ int err;
priv->can.state = CAN_STATE_ERROR_ACTIVE;
if (netif_running(dev)) {
netif_device_attach(dev);
netif_start_queue(dev);
+ err = flexcan_chip_enable(priv);
+ if (err)
+ return err;
}
- return flexcan_chip_enable(priv);
+ return 0;
}
static SIMPLE_DEV_PM_OPS(flexcan_pm_ops, flexcan_suspend, flexcan_resume);
diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c
index 2d1d22e..368bb07 100644
--- a/drivers/net/can/ifi_canfd/ifi_canfd.c
+++ b/drivers/net/can/ifi_canfd/ifi_canfd.c
@@ -81,6 +81,10 @@
#define IFI_CANFD_TIME_SET_TIMEA_4_12_6_6 BIT(15)
#define IFI_CANFD_TDELAY 0x1c
+#define IFI_CANFD_TDELAY_DEFAULT 0xb
+#define IFI_CANFD_TDELAY_MASK 0x3fff
+#define IFI_CANFD_TDELAY_ABS BIT(14)
+#define IFI_CANFD_TDELAY_EN BIT(15)
#define IFI_CANFD_ERROR 0x20
#define IFI_CANFD_ERROR_TX_OFFSET 0
@@ -641,7 +645,7 @@
struct ifi_canfd_priv *priv = netdev_priv(ndev);
const struct can_bittiming *bt = &priv->can.bittiming;
const struct can_bittiming *dbt = &priv->can.data_bittiming;
- u16 brp, sjw, tseg1, tseg2;
+ u16 brp, sjw, tseg1, tseg2, tdc;
/* Configure bit timing */
brp = bt->brp - 2;
@@ -664,6 +668,11 @@
(brp << IFI_CANFD_TIME_PRESCALE_OFF) |
(sjw << IFI_CANFD_TIME_SJW_OFF_7_9_8_8),
priv->base + IFI_CANFD_FTIME);
+
+ /* Configure transmitter delay */
+ tdc = (dbt->brp * (dbt->phase_seg1 + 1)) & IFI_CANFD_TDELAY_MASK;
+ writel(IFI_CANFD_TDELAY_EN | IFI_CANFD_TDELAY_ABS | tdc,
+ priv->base + IFI_CANFD_TDELAY);
}
static void ifi_canfd_set_filter(struct net_device *ndev, const u32 id,
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 8fc3f3c..505ceaf 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -6356,10 +6356,6 @@
struct bnx2 *bp = netdev_priv(dev);
int rc;
- rc = bnx2_request_firmware(bp);
- if (rc < 0)
- goto out;
-
netif_carrier_off(dev);
bnx2_disable_int(bp);
@@ -6428,7 +6424,6 @@
bnx2_free_irq(bp);
bnx2_free_mem(bp);
bnx2_del_napi(bp);
- bnx2_release_firmware(bp);
goto out;
}
@@ -8575,6 +8570,12 @@
pci_set_drvdata(pdev, dev);
+ rc = bnx2_request_firmware(bp);
+ if (rc < 0)
+ goto error;
+
+
+ bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN);
dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
@@ -8607,6 +8608,7 @@
return 0;
error:
+ bnx2_release_firmware(bp);
pci_iounmap(pdev, bp->regview);
pci_release_regions(pdev);
pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 8d4f849..5414563 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -453,25 +453,29 @@
static int bcmgenet_get_settings(struct net_device *dev,
struct ethtool_cmd *cmd)
{
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+
if (!netif_running(dev))
return -EINVAL;
- if (!dev->phydev)
+ if (!priv->phydev)
return -ENODEV;
- return phy_ethtool_gset(dev->phydev, cmd);
+ return phy_ethtool_gset(priv->phydev, cmd);
}
static int bcmgenet_set_settings(struct net_device *dev,
struct ethtool_cmd *cmd)
{
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+
if (!netif_running(dev))
return -EINVAL;
- if (!dev->phydev)
+ if (!priv->phydev)
return -ENODEV;
- return phy_ethtool_sset(dev->phydev, cmd);
+ return phy_ethtool_sset(priv->phydev, cmd);
}
static int bcmgenet_set_rx_csum(struct net_device *dev,
@@ -937,7 +941,7 @@
e->eee_active = p->eee_active;
e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER);
- return phy_ethtool_get_eee(dev->phydev, e);
+ return phy_ethtool_get_eee(priv->phydev, e);
}
static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
@@ -954,7 +958,7 @@
if (!p->eee_enabled) {
bcmgenet_eee_enable_set(dev, false);
} else {
- ret = phy_init_eee(dev->phydev, 0);
+ ret = phy_init_eee(priv->phydev, 0);
if (ret) {
netif_err(priv, hw, dev, "EEE initialization failed\n");
return ret;
@@ -964,12 +968,14 @@
bcmgenet_eee_enable_set(dev, true);
}
- return phy_ethtool_set_eee(dev->phydev, e);
+ return phy_ethtool_set_eee(priv->phydev, e);
}
static int bcmgenet_nway_reset(struct net_device *dev)
{
- return genphy_restart_aneg(dev->phydev);
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+
+ return genphy_restart_aneg(priv->phydev);
}
/* standard ethtool support functions. */
@@ -996,13 +1002,12 @@
static int bcmgenet_power_down(struct bcmgenet_priv *priv,
enum bcmgenet_power_mode mode)
{
- struct net_device *ndev = priv->dev;
int ret = 0;
u32 reg;
switch (mode) {
case GENET_POWER_CABLE_SENSE:
- phy_detach(ndev->phydev);
+ phy_detach(priv->phydev);
break;
case GENET_POWER_WOL_MAGIC:
@@ -1063,6 +1068,7 @@
/* ioctl handle special commands that are not present in ethtool. */
static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
+ struct bcmgenet_priv *priv = netdev_priv(dev);
int val = 0;
if (!netif_running(dev))
@@ -1072,10 +1078,10 @@
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
- if (!dev->phydev)
+ if (!priv->phydev)
val = -ENODEV;
else
- val = phy_mii_ioctl(dev->phydev, rq, cmd);
+ val = phy_mii_ioctl(priv->phydev, rq, cmd);
break;
default:
@@ -2458,7 +2464,6 @@
{
struct bcmgenet_priv *priv = container_of(
work, struct bcmgenet_priv, bcmgenet_irq_work);
- struct net_device *ndev = priv->dev;
netif_dbg(priv, intr, priv->dev, "%s\n", __func__);
@@ -2471,7 +2476,7 @@
/* Link UP/DOWN event */
if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
- phy_mac_interrupt(ndev->phydev,
+ phy_mac_interrupt(priv->phydev,
!!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
}
@@ -2833,7 +2838,7 @@
/* Monitor link interrupts now */
bcmgenet_link_intr_enable(priv);
- phy_start(dev->phydev);
+ phy_start(priv->phydev);
}
static int bcmgenet_open(struct net_device *dev)
@@ -2932,7 +2937,7 @@
struct bcmgenet_priv *priv = netdev_priv(dev);
netif_tx_stop_all_queues(dev);
- phy_stop(dev->phydev);
+ phy_stop(priv->phydev);
bcmgenet_intr_disable(priv);
bcmgenet_disable_rx_napi(priv);
bcmgenet_disable_tx_napi(priv);
@@ -2958,7 +2963,7 @@
bcmgenet_netif_stop(dev);
/* Really kill the PHY state machine and disconnect from it */
- phy_disconnect(dev->phydev);
+ phy_disconnect(priv->phydev);
/* Disable MAC receive */
umac_enable_set(priv, CMD_RX_EN, false);
@@ -3517,7 +3522,7 @@
bcmgenet_netif_stop(dev);
- phy_suspend(dev->phydev);
+ phy_suspend(priv->phydev);
netif_device_detach(dev);
@@ -3581,7 +3586,7 @@
if (priv->wolopts)
clk_disable_unprepare(priv->clk_wol);
- phy_init_hw(dev->phydev);
+ phy_init_hw(priv->phydev);
/* Speed settings must be restored */
bcmgenet_mii_config(priv->dev);
@@ -3614,7 +3619,7 @@
netif_device_attach(dev);
- phy_resume(dev->phydev);
+ phy_resume(priv->phydev);
if (priv->eee.eee_enabled)
bcmgenet_eee_enable_set(dev, true);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 0f0868c..1e2dc34 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -597,6 +597,7 @@
/* MDIO bus variables */
wait_queue_head_t wq;
+ struct phy_device *phydev;
bool internal_phy;
struct device_node *phy_dn;
struct device_node *mdio_dn;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index e907acd..457c3bc 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -86,7 +86,7 @@
void bcmgenet_mii_setup(struct net_device *dev)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
- struct phy_device *phydev = dev->phydev;
+ struct phy_device *phydev = priv->phydev;
u32 reg, cmd_bits = 0;
bool status_changed = false;
@@ -183,9 +183,9 @@
if (GENET_IS_V4(priv))
return;
- if (dev->phydev) {
- phy_init_hw(dev->phydev);
- phy_start_aneg(dev->phydev);
+ if (priv->phydev) {
+ phy_init_hw(priv->phydev);
+ phy_start_aneg(priv->phydev);
}
}
@@ -236,7 +236,6 @@
static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
{
- struct net_device *ndev = priv->dev;
u32 reg;
/* Speed settings are set in bcmgenet_mii_setup() */
@@ -245,14 +244,14 @@
bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL);
if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET)
- fixed_phy_set_link_update(ndev->phydev,
+ fixed_phy_set_link_update(priv->phydev,
bcmgenet_fixed_phy_link_update);
}
int bcmgenet_mii_config(struct net_device *dev)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
- struct phy_device *phydev = dev->phydev;
+ struct phy_device *phydev = priv->phydev;
struct device *kdev = &priv->pdev->dev;
const char *phy_name = NULL;
u32 id_mode_dis = 0;
@@ -303,7 +302,7 @@
* capabilities, use that knowledge to also configure the
* Reverse MII interface correctly.
*/
- if ((phydev->supported & PHY_BASIC_FEATURES) ==
+ if ((priv->phydev->supported & PHY_BASIC_FEATURES) ==
PHY_BASIC_FEATURES)
port_ctrl = PORT_MODE_EXT_RVMII_25;
else
@@ -372,7 +371,7 @@
return -ENODEV;
}
} else {
- phydev = dev->phydev;
+ phydev = priv->phydev;
phydev->dev_flags = phy_flags;
ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
@@ -383,6 +382,8 @@
}
}
+ priv->phydev = phydev;
+
/* Configure port multiplexer based on what the probed PHY device since
* reading the 'max-speed' property determines the maximum supported
* PHY speed which is needed for bcmgenet_mii_config() to configure
@@ -390,7 +391,7 @@
*/
ret = bcmgenet_mii_config(dev);
if (ret) {
- phy_disconnect(phydev);
+ phy_disconnect(priv->phydev);
return ret;
}
@@ -400,7 +401,7 @@
* Ethernet MAC ISRs
*/
if (priv->internal_phy)
- phydev->irq = PHY_IGNORE_INTERRUPT;
+ priv->phydev->irq = PHY_IGNORE_INTERRUPT;
return 0;
}
@@ -605,6 +606,7 @@
}
+ priv->phydev = phydev;
priv->phy_interface = pd->phy_interface;
return 0;
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index a2551bc..ea967df 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -18122,14 +18122,14 @@
rtnl_lock();
- /* We needn't recover from permanent error */
- if (state == pci_channel_io_frozen)
- tp->pcierr_recovery = true;
-
/* We probably don't have netdev yet */
if (!netdev || !netif_running(netdev))
goto done;
+ /* We needn't recover from permanent error */
+ if (state == pci_channel_io_frozen)
+ tp->pcierr_recovery = true;
+
tg3_phy_stop(tp);
tg3_netif_stop(tp);
@@ -18226,7 +18226,7 @@
rtnl_lock();
- if (!netif_running(netdev))
+ if (!netdev || !netif_running(netdev))
goto done;
tg3_full_lock(tp, 0);
diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
index 0e4fdc3..31f61a7 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
@@ -31,15 +31,10 @@
#define BNAD_NUM_TXF_COUNTERS 12
#define BNAD_NUM_RXF_COUNTERS 10
#define BNAD_NUM_CQ_COUNTERS (3 + 5)
-#define BNAD_NUM_RXQ_COUNTERS 6
+#define BNAD_NUM_RXQ_COUNTERS 7
#define BNAD_NUM_TXQ_COUNTERS 5
-#define BNAD_ETHTOOL_STATS_NUM \
- (sizeof(struct rtnl_link_stats64) / sizeof(u64) + \
- sizeof(struct bnad_drv_stats) / sizeof(u64) + \
- offsetof(struct bfi_enet_stats, rxf_stats[0]) / sizeof(u64))
-
-static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = {
+static const char *bnad_net_stats_strings[] = {
"rx_packets",
"tx_packets",
"rx_bytes",
@@ -50,22 +45,10 @@
"tx_dropped",
"multicast",
"collisions",
-
"rx_length_errors",
- "rx_over_errors",
"rx_crc_errors",
"rx_frame_errors",
- "rx_fifo_errors",
- "rx_missed_errors",
-
- "tx_aborted_errors",
- "tx_carrier_errors",
"tx_fifo_errors",
- "tx_heartbeat_errors",
- "tx_window_errors",
-
- "rx_compressed",
- "tx_compressed",
"netif_queue_stop",
"netif_queue_wakeup",
@@ -254,6 +237,8 @@
"fc_tx_fid_parity_errors",
};
+#define BNAD_ETHTOOL_STATS_NUM ARRAY_SIZE(bnad_net_stats_strings)
+
static int
bnad_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
{
@@ -658,6 +643,8 @@
string += ETH_GSTRING_LEN;
sprintf(string, "rxq%d_allocbuf_failed", q_num);
string += ETH_GSTRING_LEN;
+ sprintf(string, "rxq%d_mapbuf_failed", q_num);
+ string += ETH_GSTRING_LEN;
sprintf(string, "rxq%d_producer_index", q_num);
string += ETH_GSTRING_LEN;
sprintf(string, "rxq%d_consumer_index", q_num);
@@ -678,6 +665,9 @@
sprintf(string, "rxq%d_allocbuf_failed",
q_num);
string += ETH_GSTRING_LEN;
+ sprintf(string, "rxq%d_mapbuf_failed",
+ q_num);
+ string += ETH_GSTRING_LEN;
sprintf(string, "rxq%d_producer_index",
q_num);
string += ETH_GSTRING_LEN;
@@ -854,9 +844,9 @@
u64 *buf)
{
struct bnad *bnad = netdev_priv(netdev);
- int i, j, bi;
+ int i, j, bi = 0;
unsigned long flags;
- struct rtnl_link_stats64 *net_stats64;
+ struct rtnl_link_stats64 net_stats64;
u64 *stats64;
u32 bmap;
@@ -871,14 +861,25 @@
* under the same lock
*/
spin_lock_irqsave(&bnad->bna_lock, flags);
- bi = 0;
- memset(buf, 0, stats->n_stats * sizeof(u64));
- net_stats64 = (struct rtnl_link_stats64 *)buf;
- bnad_netdev_qstats_fill(bnad, net_stats64);
- bnad_netdev_hwstats_fill(bnad, net_stats64);
+ memset(&net_stats64, 0, sizeof(net_stats64));
+ bnad_netdev_qstats_fill(bnad, &net_stats64);
+ bnad_netdev_hwstats_fill(bnad, &net_stats64);
- bi = sizeof(*net_stats64) / sizeof(u64);
+ buf[bi++] = net_stats64.rx_packets;
+ buf[bi++] = net_stats64.tx_packets;
+ buf[bi++] = net_stats64.rx_bytes;
+ buf[bi++] = net_stats64.tx_bytes;
+ buf[bi++] = net_stats64.rx_errors;
+ buf[bi++] = net_stats64.tx_errors;
+ buf[bi++] = net_stats64.rx_dropped;
+ buf[bi++] = net_stats64.tx_dropped;
+ buf[bi++] = net_stats64.multicast;
+ buf[bi++] = net_stats64.collisions;
+ buf[bi++] = net_stats64.rx_length_errors;
+ buf[bi++] = net_stats64.rx_crc_errors;
+ buf[bi++] = net_stats64.rx_frame_errors;
+ buf[bi++] = net_stats64.tx_fifo_errors;
/* Get netif_queue_stopped from stack */
bnad->stats.drv_stats.netif_queue_stopped = netif_queue_stopped(netdev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 2e2aa9f..edd2338 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -419,8 +419,8 @@
unsigned short supported; /* link capabilities */
unsigned short advertising; /* advertised capabilities */
unsigned short lp_advertising; /* peer advertised capabilities */
- unsigned short requested_speed; /* speed user has requested */
- unsigned short speed; /* actual link speed */
+ unsigned int requested_speed; /* speed user has requested */
+ unsigned int speed; /* actual link speed */
unsigned char requested_fc; /* flow control user has requested */
unsigned char fc; /* actual link flow control */
unsigned char autoneg; /* autonegotiating? */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index c762a8c..3ceafb55 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4305,10 +4305,17 @@
.resume = eeh_resume,
};
+/* Return true if the Link Configuration supports "High Speeds" (those greater
+ * than 1Gb/s).
+ */
static inline bool is_x_10g_port(const struct link_config *lc)
{
- return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
- (lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
+ unsigned int speeds, high_speeds;
+
+ speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported));
+ high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G);
+
+ return high_speeds != 0;
}
static inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
@@ -4756,8 +4763,12 @@
bufp += sprintf(bufp, "1000/");
if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
bufp += sprintf(bufp, "10G/");
+ if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G)
+ bufp += sprintf(bufp, "25G/");
if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G)
bufp += sprintf(bufp, "40G/");
+ if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G)
+ bufp += sprintf(bufp, "100G/");
if (bufp != buf)
--bufp;
sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type));
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index dc92c80..660204b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -3627,7 +3627,8 @@
}
#define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
- FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \
+ FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \
+ FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
FW_PORT_CAP_ANEG)
/**
@@ -7196,8 +7197,12 @@
speed = 1000;
else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
speed = 10000;
+ else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
+ speed = 25000;
else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
speed = 40000;
+ else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
+ speed = 100000;
lc = &pi->link_cfg;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index a89b307..30507d4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -2265,6 +2265,12 @@
FW_PORT_CAP_802_3_ASM_DIR = 0x8000,
};
+#define FW_PORT_CAP_SPEED_S 0
+#define FW_PORT_CAP_SPEED_M 0x3f
+#define FW_PORT_CAP_SPEED_V(x) ((x) << FW_PORT_CAP_SPEED_S)
+#define FW_PORT_CAP_SPEED_G(x) \
+ (((x) >> FW_PORT_CAP_SPEED_S) & FW_PORT_CAP_SPEED_M)
+
enum fw_port_mdi {
FW_PORT_CAP_MDI_UNCHANGED,
FW_PORT_CAP_MDI_AUTO,
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
index 8ee5414..17a2bbc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
@@ -108,8 +108,8 @@
unsigned int supported; /* link capabilities */
unsigned int advertising; /* advertised capabilities */
unsigned short lp_advertising; /* peer advertised capabilities */
- unsigned short requested_speed; /* speed user has requested */
- unsigned short speed; /* actual link speed */
+ unsigned int requested_speed; /* speed user has requested */
+ unsigned int speed; /* actual link speed */
unsigned char requested_fc; /* flow control user has requested */
unsigned char fc; /* actual link flow control */
unsigned char autoneg; /* autonegotiating? */
@@ -271,10 +271,17 @@
return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0;
}
+/* Return true if the Link Configuration supports "High Speeds" (those greater
+ * than 1Gb/s).
+ */
static inline bool is_x_10g_port(const struct link_config *lc)
{
- return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
- (lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
+ unsigned int speeds, high_speeds;
+
+ speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported));
+ high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G);
+
+ return high_speeds != 0;
}
static inline unsigned int core_ticks_per_usec(const struct adapter *adapter)
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index 427bfa7..b5622b1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -314,8 +314,9 @@
}
#define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
- FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \
- FW_PORT_CAP_SPEED_100G | FW_PORT_CAP_ANEG)
+ FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \
+ FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
+ FW_PORT_CAP_ANEG)
/**
* init_link_config - initialize a link's SW state
@@ -1712,8 +1713,12 @@
speed = 1000;
else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
speed = 10000;
+ else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
+ speed = 25000;
else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
speed = 40000;
+ else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
+ speed = 100000;
/*
* Scan all of our "ports" (Virtual Interfaces) looking for
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 01f7e81..692ee24 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -89,10 +89,10 @@
.driver_data = 0,
}, {
.name = "imx25-fec",
- .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_HAS_RACC,
+ .driver_data = FEC_QUIRK_USE_GASKET,
}, {
.name = "imx27-fec",
- .driver_data = FEC_QUIRK_HAS_RACC,
+ .driver_data = 0,
}, {
.name = "imx28-fec",
.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME |
@@ -180,6 +180,7 @@
/* FEC receive acceleration */
#define FEC_RACC_IPDIS (1 << 1)
#define FEC_RACC_PRODIS (1 << 2)
+#define FEC_RACC_SHIFT16 BIT(7)
#define FEC_RACC_OPTIONS (FEC_RACC_IPDIS | FEC_RACC_PRODIS)
/*
@@ -945,9 +946,11 @@
#if !defined(CONFIG_M5272)
if (fep->quirks & FEC_QUIRK_HAS_RACC) {
- /* set RX checksum */
val = readl(fep->hwp + FEC_RACC);
+ /* align IP header */
+ val |= FEC_RACC_SHIFT16;
if (fep->csum_flags & FLAG_RX_CSUM_ENABLED)
+ /* set RX checksum */
val |= FEC_RACC_OPTIONS;
else
val &= ~FEC_RACC_OPTIONS;
@@ -1428,6 +1431,12 @@
prefetch(skb->data - NET_IP_ALIGN);
skb_put(skb, pkt_len - 4);
data = skb->data;
+
+#if !defined(CONFIG_M5272)
+ if (fep->quirks & FEC_QUIRK_HAS_RACC)
+ data = skb_pull_inline(skb, 2);
+#endif
+
if (!is_copybreak && need_swap)
swap_buffer(data, pkt_len);
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 4c9771d..7af09cb 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -977,7 +977,37 @@
dev->mcast_pending = 1;
return;
}
+
+ mutex_lock(&dev->link_lock);
__emac_set_multicast_list(dev);
+ mutex_unlock(&dev->link_lock);
+}
+
+static int emac_set_mac_address(struct net_device *ndev, void *sa)
+{
+ struct emac_instance *dev = netdev_priv(ndev);
+ struct sockaddr *addr = sa;
+ struct emac_regs __iomem *p = dev->emacp;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ mutex_lock(&dev->link_lock);
+
+ memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len);
+
+ emac_rx_disable(dev);
+ emac_tx_disable(dev);
+ out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]);
+ out_be32(&p->ialr, (ndev->dev_addr[2] << 24) |
+ (ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) |
+ ndev->dev_addr[5]);
+ emac_tx_enable(dev);
+ emac_rx_enable(dev);
+
+ mutex_unlock(&dev->link_lock);
+
+ return 0;
}
static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu)
@@ -2686,7 +2716,7 @@
.ndo_do_ioctl = emac_ioctl,
.ndo_tx_timeout = emac_tx_timeout,
.ndo_validate_addr = eth_validate_addr,
- .ndo_set_mac_address = eth_mac_addr,
+ .ndo_set_mac_address = emac_set_mac_address,
.ndo_start_xmit = emac_start_xmit,
.ndo_change_mtu = eth_change_mtu,
};
@@ -2699,7 +2729,7 @@
.ndo_do_ioctl = emac_ioctl,
.ndo_tx_timeout = emac_tx_timeout,
.ndo_validate_addr = eth_validate_addr,
- .ndo_set_mac_address = eth_mac_addr,
+ .ndo_set_mac_address = emac_set_mac_address,
.ndo_start_xmit = emac_start_xmit_sg,
.ndo_change_mtu = emac_change_mtu,
};
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index d41c28d..b745487 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -382,7 +382,8 @@
struct mvneta_rx_queue *rxqs;
struct mvneta_tx_queue *txqs;
struct net_device *dev;
- struct notifier_block cpu_notifier;
+ struct hlist_node node_online;
+ struct hlist_node node_dead;
int rxq_def;
/* Protect the access to the percpu interrupt registers,
* ensuring that the configuration remains coherent.
@@ -574,6 +575,7 @@
int next_desc_to_proc;
};
+static enum cpuhp_state online_hpstate;
/* The hardware supports eight (8) rx queues, but we are only allowing
* the first one to be used. Therefore, let's just allocate one queue.
*/
@@ -3311,101 +3313,104 @@
}
};
-static int mvneta_percpu_notifier(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node)
{
- struct mvneta_port *pp = container_of(nfb, struct mvneta_port,
- cpu_notifier);
- int cpu = (unsigned long)hcpu, other_cpu;
+ int other_cpu;
+ struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port,
+ node_online);
struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
- switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- spin_lock(&pp->lock);
- /* Configuring the driver for a new CPU while the
- * driver is stopping is racy, so just avoid it.
- */
- if (pp->is_stopped) {
- spin_unlock(&pp->lock);
- break;
+
+ spin_lock(&pp->lock);
+ /*
+ * Configuring the driver for a new CPU while the driver is
+ * stopping is racy, so just avoid it.
+ */
+ if (pp->is_stopped) {
+ spin_unlock(&pp->lock);
+ return 0;
+ }
+ netif_tx_stop_all_queues(pp->dev);
+
+ /*
+ * We have to synchronise on tha napi of each CPU except the one
+ * just being woken up
+ */
+ for_each_online_cpu(other_cpu) {
+ if (other_cpu != cpu) {
+ struct mvneta_pcpu_port *other_port =
+ per_cpu_ptr(pp->ports, other_cpu);
+
+ napi_synchronize(&other_port->napi);
}
- netif_tx_stop_all_queues(pp->dev);
-
- /* We have to synchronise on tha napi of each CPU
- * except the one just being waked up
- */
- for_each_online_cpu(other_cpu) {
- if (other_cpu != cpu) {
- struct mvneta_pcpu_port *other_port =
- per_cpu_ptr(pp->ports, other_cpu);
-
- napi_synchronize(&other_port->napi);
- }
- }
-
- /* Mask all ethernet port interrupts */
- on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
- napi_enable(&port->napi);
-
-
- /* Enable per-CPU interrupts on the CPU that is
- * brought up.
- */
- mvneta_percpu_enable(pp);
-
- /* Enable per-CPU interrupt on the one CPU we care
- * about.
- */
- mvneta_percpu_elect(pp);
-
- /* Unmask all ethernet port interrupts */
- on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
- mvreg_write(pp, MVNETA_INTR_MISC_MASK,
- MVNETA_CAUSE_PHY_STATUS_CHANGE |
- MVNETA_CAUSE_LINK_CHANGE |
- MVNETA_CAUSE_PSC_SYNC_CHANGE);
- netif_tx_start_all_queues(pp->dev);
- spin_unlock(&pp->lock);
- break;
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- netif_tx_stop_all_queues(pp->dev);
- /* Thanks to this lock we are sure that any pending
- * cpu election is done
- */
- spin_lock(&pp->lock);
- /* Mask all ethernet port interrupts */
- on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
- spin_unlock(&pp->lock);
-
- napi_synchronize(&port->napi);
- napi_disable(&port->napi);
- /* Disable per-CPU interrupts on the CPU that is
- * brought down.
- */
- mvneta_percpu_disable(pp);
-
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- /* Check if a new CPU must be elected now this on is down */
- spin_lock(&pp->lock);
- mvneta_percpu_elect(pp);
- spin_unlock(&pp->lock);
- /* Unmask all ethernet port interrupts */
- on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
- mvreg_write(pp, MVNETA_INTR_MISC_MASK,
- MVNETA_CAUSE_PHY_STATUS_CHANGE |
- MVNETA_CAUSE_LINK_CHANGE |
- MVNETA_CAUSE_PSC_SYNC_CHANGE);
- netif_tx_start_all_queues(pp->dev);
- break;
}
- return NOTIFY_OK;
+ /* Mask all ethernet port interrupts */
+ on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+ napi_enable(&port->napi);
+
+ /*
+ * Enable per-CPU interrupts on the CPU that is
+ * brought up.
+ */
+ mvneta_percpu_enable(pp);
+
+ /*
+ * Enable per-CPU interrupt on the one CPU we care
+ * about.
+ */
+ mvneta_percpu_elect(pp);
+
+ /* Unmask all ethernet port interrupts */
+ on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+ mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+ MVNETA_CAUSE_PHY_STATUS_CHANGE |
+ MVNETA_CAUSE_LINK_CHANGE |
+ MVNETA_CAUSE_PSC_SYNC_CHANGE);
+ netif_tx_start_all_queues(pp->dev);
+ spin_unlock(&pp->lock);
+ return 0;
+}
+
+static int mvneta_cpu_down_prepare(unsigned int cpu, struct hlist_node *node)
+{
+ struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port,
+ node_online);
+ struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+
+ /*
+ * Thanks to this lock we are sure that any pending cpu election is
+ * done.
+ */
+ spin_lock(&pp->lock);
+ /* Mask all ethernet port interrupts */
+ on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+ spin_unlock(&pp->lock);
+
+ napi_synchronize(&port->napi);
+ napi_disable(&port->napi);
+ /* Disable per-CPU interrupts on the CPU that is brought down. */
+ mvneta_percpu_disable(pp);
+ return 0;
+}
+
+static int mvneta_cpu_dead(unsigned int cpu, struct hlist_node *node)
+{
+ struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port,
+ node_dead);
+
+ /* Check if a new CPU must be elected now this on is down */
+ spin_lock(&pp->lock);
+ mvneta_percpu_elect(pp);
+ spin_unlock(&pp->lock);
+ /* Unmask all ethernet port interrupts */
+ on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+ mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+ MVNETA_CAUSE_PHY_STATUS_CHANGE |
+ MVNETA_CAUSE_LINK_CHANGE |
+ MVNETA_CAUSE_PSC_SYNC_CHANGE);
+ netif_tx_start_all_queues(pp->dev);
+ return 0;
}
static int mvneta_open(struct net_device *dev)
@@ -3442,7 +3447,15 @@
/* Register a CPU notifier to handle the case where our CPU
* might be taken offline.
*/
- register_cpu_notifier(&pp->cpu_notifier);
+ ret = cpuhp_state_add_instance_nocalls(online_hpstate,
+ &pp->node_online);
+ if (ret)
+ goto err_free_irq;
+
+ ret = cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+ &pp->node_dead);
+ if (ret)
+ goto err_free_online_hp;
/* In default link is down */
netif_carrier_off(pp->dev);
@@ -3450,15 +3463,19 @@
ret = mvneta_mdio_probe(pp);
if (ret < 0) {
netdev_err(dev, "cannot probe MDIO bus\n");
- goto err_free_irq;
+ goto err_free_dead_hp;
}
mvneta_start_dev(pp);
return 0;
+err_free_dead_hp:
+ cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+ &pp->node_dead);
+err_free_online_hp:
+ cpuhp_state_remove_instance_nocalls(online_hpstate, &pp->node_online);
err_free_irq:
- unregister_cpu_notifier(&pp->cpu_notifier);
on_each_cpu(mvneta_percpu_disable, pp, true);
free_percpu_irq(pp->dev->irq, pp->ports);
err_cleanup_txqs:
@@ -3484,7 +3501,10 @@
mvneta_stop_dev(pp);
mvneta_mdio_remove(pp);
- unregister_cpu_notifier(&pp->cpu_notifier);
+
+ cpuhp_state_remove_instance_nocalls(online_hpstate, &pp->node_online);
+ cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+ &pp->node_dead);
on_each_cpu(mvneta_percpu_disable, pp, true);
free_percpu_irq(dev->irq, pp->ports);
mvneta_cleanup_rxqs(pp);
@@ -4024,7 +4044,6 @@
err = of_property_read_string(dn, "managed", &managed);
pp->use_inband_status = (err == 0 &&
strcmp(managed, "in-band-status") == 0);
- pp->cpu_notifier.notifier_call = mvneta_percpu_notifier;
pp->rxq_def = rxq_def;
@@ -4227,7 +4246,42 @@
},
};
-module_platform_driver(mvneta_driver);
+static int __init mvneta_driver_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/mvmeta:online",
+ mvneta_cpu_online,
+ mvneta_cpu_down_prepare);
+ if (ret < 0)
+ goto out;
+ online_hpstate = ret;
+ ret = cpuhp_setup_state_multi(CPUHP_NET_MVNETA_DEAD, "net/mvneta:dead",
+ NULL, mvneta_cpu_dead);
+ if (ret)
+ goto err_dead;
+
+ ret = platform_driver_register(&mvneta_driver);
+ if (ret)
+ goto err;
+ return 0;
+
+err:
+ cpuhp_remove_multi_state(CPUHP_NET_MVNETA_DEAD);
+err_dead:
+ cpuhp_remove_multi_state(online_hpstate);
+out:
+ return ret;
+}
+module_init(mvneta_driver_init);
+
+static void __exit mvneta_driver_exit(void)
+{
+ platform_driver_unregister(&mvneta_driver);
+ cpuhp_remove_multi_state(CPUHP_NET_MVNETA_DEAD);
+ cpuhp_remove_multi_state(online_hpstate);
+}
+module_exit(mvneta_driver_exit);
MODULE_DESCRIPTION("Marvell NETA Ethernet Driver - www.marvell.com");
MODULE_AUTHOR("Rami Rosen <rosenr@marvell.com>, Thomas Petazzoni <thomas.petazzoni@free-electrons.com>");
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index d919915..3743af8 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1923,6 +1923,7 @@
{ .compatible = "mediatek,mt7623-eth" },
{},
};
+MODULE_DEVICE_TABLE(of, of_mtk_match);
static struct platform_driver mtk_driver = {
.probe = mtk_probe,
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index f613977..cf8f8a7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1305,8 +1305,8 @@
return 0;
err_out_unmap:
- while (i >= 0)
- mlx4_free_eq(dev, &priv->eq_table.eq[i--]);
+ while (i > 0)
+ mlx4_free_eq(dev, &priv->eq_table.eq[--i]);
#ifdef CONFIG_RFS_ACCEL
for (i = 1; i <= dev->caps.num_ports; i++) {
if (mlx4_priv(dev)->port[i].rmap) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 75dd2e3..7183ac4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2970,6 +2970,7 @@
mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
device_remove_file(&info->dev->persist->pdev->dev,
&info->port_attr);
+ devlink_port_unregister(&info->devlink_port);
info->port = -1;
}
@@ -2984,6 +2985,8 @@
device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
device_remove_file(&info->dev->persist->pdev->dev,
&info->port_mtu_attr);
+ devlink_port_unregister(&info->devlink_port);
+
#ifdef CONFIG_RFS_ACCEL
free_irq_cpu_rmap(info->rmap);
info->rmap = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 8b78f15..b247949 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1554,6 +1554,7 @@
abort:
esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+ esw->mode = SRIOV_NONE;
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 3dc83a9..7de40e6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -446,7 +446,7 @@
static int esw_offloads_start(struct mlx5_eswitch *esw)
{
- int err, num_vfs = esw->dev->priv.sriov.num_vfs;
+ int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
if (esw->mode != SRIOV_LEGACY) {
esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n");
@@ -455,8 +455,12 @@
mlx5_eswitch_disable_sriov(esw);
err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
- if (err)
- esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err);
+ if (err) {
+ esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err);
+ err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+ if (err1)
+ esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err);
+ }
return err;
}
@@ -508,12 +512,16 @@
static int esw_offloads_stop(struct mlx5_eswitch *esw)
{
- int err, num_vfs = esw->dev->priv.sriov.num_vfs;
+ int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
mlx5_eswitch_disable_sriov(esw);
err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
- if (err)
- esw_warn(esw->dev, "Failed set eswitch legacy mode. err %d\n", err);
+ if (err) {
+ esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err);
+ err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+ if (err1)
+ esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err);
+ }
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 9134010..287ade1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -425,11 +425,11 @@
mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num)
{
struct mlx5_cmd_fc_bulk *b;
- int outlen = sizeof(*b) +
+ int outlen =
MLX5_ST_SZ_BYTES(query_flow_counter_out) +
MLX5_ST_SZ_BYTES(traffic_counter) * num;
- b = kzalloc(outlen, GFP_KERNEL);
+ b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL);
if (!b)
return NULL;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 252e492..39dadfc 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2044,12 +2044,16 @@
nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings),
GFP_KERNEL);
- if (!nn->rx_rings)
+ if (!nn->rx_rings) {
+ err = -ENOMEM;
goto err_free_lsc;
+ }
nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings),
GFP_KERNEL);
- if (!nn->tx_rings)
+ if (!nn->tx_rings) {
+ err = -ENOMEM;
goto err_free_rx_rings;
+ }
for (r = 0; r < nn->num_r_vecs; r++) {
err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index a240f26..f776a77 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1153,8 +1153,8 @@
p_drv_version = &union_data.drv_version;
p_drv_version->version = p_ver->version;
- for (i = 0; i < MCP_DRV_VER_STR_SIZE - 1; i += 4) {
- val = cpu_to_be32(p_ver->name[i]);
+ for (i = 0; i < (MCP_DRV_VER_STR_SIZE - 4) / sizeof(u32); i++) {
+ val = cpu_to_be32(*((u32 *)&p_ver->name[i * sizeof(u32)]));
*(__be32 *)&p_drv_version->name[i * sizeof(u32)] = val;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index cbefe9e..885a5e6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -261,7 +261,7 @@
}
if (mode & WAKE_UCAST) {
pr_debug("GMAC: WOL on global unicast\n");
- pmt |= global_unicast;
+ pmt |= power_down | global_unicast | wake_up_frame_en;
}
writel(pmt, ioaddr + GMAC_PMT);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index df5580d..51019b7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -102,7 +102,7 @@
}
if (mode & WAKE_UCAST) {
pr_debug("GMAC: WOL on global unicast\n");
- pmt |= global_unicast;
+ pmt |= power_down | global_unicast | wake_up_frame_en;
}
writel(pmt, ioaddr + GMAC_PMT);
diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c
index 7756748..92af182 100644
--- a/drivers/net/phy/mdio-xgene.c
+++ b/drivers/net/phy/mdio-xgene.c
@@ -424,10 +424,8 @@
mdiobus_unregister(mdio_bus);
mdiobus_free(mdio_bus);
- if (dev->of_node) {
- if (IS_ERR(pdata->clk))
- clk_disable_unprepare(pdata->clk);
- }
+ if (dev->of_node)
+ clk_disable_unprepare(pdata->clk);
return 0;
}
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index f41a8ad..c254248 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -32,7 +32,7 @@
#define NETNEXT_VERSION "08"
/* Information for net */
-#define NET_VERSION "5"
+#define NET_VERSION "6"
#define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION
#define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -2552,6 +2552,77 @@
}
}
+static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg)
+{
+ ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev);
+ ocp_reg_write(tp, OCP_EEE_DATA, reg);
+ ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev);
+}
+
+static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg)
+{
+ u16 data;
+
+ r8152_mmd_indirect(tp, dev, reg);
+ data = ocp_reg_read(tp, OCP_EEE_DATA);
+ ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
+
+ return data;
+}
+
+static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data)
+{
+ r8152_mmd_indirect(tp, dev, reg);
+ ocp_reg_write(tp, OCP_EEE_DATA, data);
+ ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
+}
+
+static void r8152_eee_en(struct r8152 *tp, bool enable)
+{
+ u16 config1, config2, config3;
+ u32 ocp_data;
+
+ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
+ config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask;
+ config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2);
+ config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask;
+
+ if (enable) {
+ ocp_data |= EEE_RX_EN | EEE_TX_EN;
+ config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN;
+ config1 |= sd_rise_time(1);
+ config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN;
+ config3 |= fast_snr(42);
+ } else {
+ ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
+ config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN |
+ RX_QUIET_EN);
+ config1 |= sd_rise_time(7);
+ config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN);
+ config3 |= fast_snr(511);
+ }
+
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
+ ocp_reg_write(tp, OCP_EEE_CONFIG1, config1);
+ ocp_reg_write(tp, OCP_EEE_CONFIG2, config2);
+ ocp_reg_write(tp, OCP_EEE_CONFIG3, config3);
+}
+
+static void r8152b_enable_eee(struct r8152 *tp)
+{
+ r8152_eee_en(tp, true);
+ r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX);
+}
+
+static void r8152b_enable_fc(struct r8152 *tp)
+{
+ u16 anar;
+
+ anar = r8152_mdio_read(tp, MII_ADVERTISE);
+ anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
+ r8152_mdio_write(tp, MII_ADVERTISE, anar);
+}
+
static void rtl8152_disable(struct r8152 *tp)
{
r8152_aldps_en(tp, false);
@@ -2561,13 +2632,9 @@
static void r8152b_hw_phy_cfg(struct r8152 *tp)
{
- u16 data;
-
- data = r8152_mdio_read(tp, MII_BMCR);
- if (data & BMCR_PDOWN) {
- data &= ~BMCR_PDOWN;
- r8152_mdio_write(tp, MII_BMCR, data);
- }
+ r8152b_enable_eee(tp);
+ r8152_aldps_en(tp, true);
+ r8152b_enable_fc(tp);
set_bit(PHY_RESET, &tp->flags);
}
@@ -2701,20 +2768,52 @@
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
}
+static void r8153_aldps_en(struct r8152 *tp, bool enable)
+{
+ u16 data;
+
+ data = ocp_reg_read(tp, OCP_POWER_CFG);
+ if (enable) {
+ data |= EN_ALDPS;
+ ocp_reg_write(tp, OCP_POWER_CFG, data);
+ } else {
+ data &= ~EN_ALDPS;
+ ocp_reg_write(tp, OCP_POWER_CFG, data);
+ msleep(20);
+ }
+}
+
+static void r8153_eee_en(struct r8152 *tp, bool enable)
+{
+ u32 ocp_data;
+ u16 config;
+
+ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
+ config = ocp_reg_read(tp, OCP_EEE_CFG);
+
+ if (enable) {
+ ocp_data |= EEE_RX_EN | EEE_TX_EN;
+ config |= EEE10_EN;
+ } else {
+ ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
+ config &= ~EEE10_EN;
+ }
+
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
+ ocp_reg_write(tp, OCP_EEE_CFG, config);
+}
+
static void r8153_hw_phy_cfg(struct r8152 *tp)
{
u32 ocp_data;
u16 data;
- if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 ||
- tp->version == RTL_VER_05)
- ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L);
+ /* disable ALDPS before updating the PHY parameters */
+ r8153_aldps_en(tp, false);
- data = r8152_mdio_read(tp, MII_BMCR);
- if (data & BMCR_PDOWN) {
- data &= ~BMCR_PDOWN;
- r8152_mdio_write(tp, MII_BMCR, data);
- }
+ /* disable EEE before updating the PHY parameters */
+ r8153_eee_en(tp, false);
+ ocp_reg_write(tp, OCP_EEE_ADV, 0);
if (tp->version == RTL_VER_03) {
data = ocp_reg_read(tp, OCP_EEE_CFG);
@@ -2745,6 +2844,12 @@
sram_write(tp, SRAM_10M_AMP1, 0x00af);
sram_write(tp, SRAM_10M_AMP2, 0x0208);
+ r8153_eee_en(tp, true);
+ ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX);
+
+ r8153_aldps_en(tp, true);
+ r8152b_enable_fc(tp);
+
set_bit(PHY_RESET, &tp->flags);
}
@@ -2866,21 +2971,6 @@
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
}
-static void r8153_aldps_en(struct r8152 *tp, bool enable)
-{
- u16 data;
-
- data = ocp_reg_read(tp, OCP_POWER_CFG);
- if (enable) {
- data |= EN_ALDPS;
- ocp_reg_write(tp, OCP_POWER_CFG, data);
- } else {
- data &= ~EN_ALDPS;
- ocp_reg_write(tp, OCP_POWER_CFG, data);
- msleep(20);
- }
-}
-
static void rtl8153_disable(struct r8152 *tp)
{
r8153_aldps_en(tp, false);
@@ -3246,103 +3336,6 @@
return res;
}
-static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg)
-{
- ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev);
- ocp_reg_write(tp, OCP_EEE_DATA, reg);
- ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev);
-}
-
-static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg)
-{
- u16 data;
-
- r8152_mmd_indirect(tp, dev, reg);
- data = ocp_reg_read(tp, OCP_EEE_DATA);
- ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
-
- return data;
-}
-
-static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data)
-{
- r8152_mmd_indirect(tp, dev, reg);
- ocp_reg_write(tp, OCP_EEE_DATA, data);
- ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
-}
-
-static void r8152_eee_en(struct r8152 *tp, bool enable)
-{
- u16 config1, config2, config3;
- u32 ocp_data;
-
- ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
- config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask;
- config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2);
- config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask;
-
- if (enable) {
- ocp_data |= EEE_RX_EN | EEE_TX_EN;
- config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN;
- config1 |= sd_rise_time(1);
- config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN;
- config3 |= fast_snr(42);
- } else {
- ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
- config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN |
- RX_QUIET_EN);
- config1 |= sd_rise_time(7);
- config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN);
- config3 |= fast_snr(511);
- }
-
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
- ocp_reg_write(tp, OCP_EEE_CONFIG1, config1);
- ocp_reg_write(tp, OCP_EEE_CONFIG2, config2);
- ocp_reg_write(tp, OCP_EEE_CONFIG3, config3);
-}
-
-static void r8152b_enable_eee(struct r8152 *tp)
-{
- r8152_eee_en(tp, true);
- r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX);
-}
-
-static void r8153_eee_en(struct r8152 *tp, bool enable)
-{
- u32 ocp_data;
- u16 config;
-
- ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
- config = ocp_reg_read(tp, OCP_EEE_CFG);
-
- if (enable) {
- ocp_data |= EEE_RX_EN | EEE_TX_EN;
- config |= EEE10_EN;
- } else {
- ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
- config &= ~EEE10_EN;
- }
-
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
- ocp_reg_write(tp, OCP_EEE_CFG, config);
-}
-
-static void r8153_enable_eee(struct r8152 *tp)
-{
- r8153_eee_en(tp, true);
- ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX);
-}
-
-static void r8152b_enable_fc(struct r8152 *tp)
-{
- u16 anar;
-
- anar = r8152_mdio_read(tp, MII_ADVERTISE);
- anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
- r8152_mdio_write(tp, MII_ADVERTISE, anar);
-}
-
static void rtl_tally_reset(struct r8152 *tp)
{
u32 ocp_data;
@@ -3355,10 +3348,17 @@
static void r8152b_init(struct r8152 *tp)
{
u32 ocp_data;
+ u16 data;
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
+ data = r8152_mdio_read(tp, MII_BMCR);
+ if (data & BMCR_PDOWN) {
+ data &= ~BMCR_PDOWN;
+ r8152_mdio_write(tp, MII_BMCR, data);
+ }
+
r8152_aldps_en(tp, false);
if (tp->version == RTL_VER_01) {
@@ -3380,9 +3380,6 @@
SPDWN_RXDV_MSK | SPDWN_LINKCHG_MSK;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_GPHY_INTR_IMR, ocp_data);
- r8152b_enable_eee(tp);
- r8152_aldps_en(tp, true);
- r8152b_enable_fc(tp);
rtl_tally_reset(tp);
/* enable rx aggregation */
@@ -3394,12 +3391,12 @@
static void r8153_init(struct r8152 *tp)
{
u32 ocp_data;
+ u16 data;
int i;
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
- r8153_aldps_en(tp, false);
r8153_u1u2en(tp, false);
for (i = 0; i < 500; i++) {
@@ -3416,6 +3413,23 @@
msleep(20);
}
+ if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 ||
+ tp->version == RTL_VER_05)
+ ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L);
+
+ data = r8152_mdio_read(tp, MII_BMCR);
+ if (data & BMCR_PDOWN) {
+ data &= ~BMCR_PDOWN;
+ r8152_mdio_write(tp, MII_BMCR, data);
+ }
+
+ for (i = 0; i < 500; i++) {
+ ocp_data = ocp_reg_read(tp, OCP_PHY_STATUS) & PHY_STAT_MASK;
+ if (ocp_data == PHY_STAT_LAN_ON)
+ break;
+ msleep(20);
+ }
+
usb_disable_lpm(tp->udev);
r8153_u2p3en(tp, false);
@@ -3483,9 +3497,6 @@
ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
- r8153_enable_eee(tp);
- r8153_aldps_en(tp, true);
- r8152b_enable_fc(tp);
rtl_tally_reset(tp);
r8153_u2p3en(tp, true);
}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1b5f531..fad84f3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -138,8 +138,9 @@
/* Does the affinity hint is set for virtqueues? */
bool affinity_hint_set;
- /* CPU hot plug notifier */
- struct notifier_block nb;
+ /* CPU hotplug instances for online & dead */
+ struct hlist_node node;
+ struct hlist_node node_dead;
/* Control VQ buffers: protected by the rtnl lock */
struct virtio_net_ctrl_hdr ctrl_hdr;
@@ -1237,25 +1238,53 @@
vi->affinity_hint_set = true;
}
-static int virtnet_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
{
- struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb);
+ struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
+ node);
+ virtnet_set_affinity(vi);
+ return 0;
+}
- switch(action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- case CPU_DOWN_FAILED:
- case CPU_DEAD:
- virtnet_set_affinity(vi);
- break;
- case CPU_DOWN_PREPARE:
- virtnet_clean_affinity(vi, (long)hcpu);
- break;
- default:
- break;
- }
+static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
+{
+ struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
+ node_dead);
+ virtnet_set_affinity(vi);
+ return 0;
+}
- return NOTIFY_OK;
+static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
+{
+ struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
+ node);
+
+ virtnet_clean_affinity(vi, cpu);
+ return 0;
+}
+
+static enum cpuhp_state virtionet_online;
+
+static int virtnet_cpu_notif_add(struct virtnet_info *vi)
+{
+ int ret;
+
+ ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
+ if (ret)
+ return ret;
+ ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
+ &vi->node_dead);
+ if (!ret)
+ return ret;
+ cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
+ return ret;
+}
+
+static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
+{
+ cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
+ cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
+ &vi->node_dead);
}
static void virtnet_get_ringparam(struct net_device *dev,
@@ -1879,8 +1908,7 @@
virtio_device_ready(vdev);
- vi->nb.notifier_call = &virtnet_cpu_callback;
- err = register_hotcpu_notifier(&vi->nb);
+ err = virtnet_cpu_notif_add(vi);
if (err) {
pr_debug("virtio_net: registering cpu notifier failed\n");
goto free_unregister_netdev;
@@ -1934,7 +1962,7 @@
{
struct virtnet_info *vi = vdev->priv;
- unregister_hotcpu_notifier(&vi->nb);
+ virtnet_cpu_notif_remove(vi);
/* Make sure no work handler is accessing the device. */
flush_work(&vi->config_work);
@@ -1953,7 +1981,7 @@
struct virtnet_info *vi = vdev->priv;
int i;
- unregister_hotcpu_notifier(&vi->nb);
+ virtnet_cpu_notif_remove(vi);
/* Make sure no work handler is accessing the device */
flush_work(&vi->config_work);
@@ -1997,7 +2025,7 @@
virtnet_set_queues(vi, vi->curr_queue_pairs);
rtnl_unlock();
- err = register_hotcpu_notifier(&vi->nb);
+ err = virtnet_cpu_notif_add(vi);
if (err)
return err;
@@ -2039,7 +2067,41 @@
#endif
};
-module_virtio_driver(virtio_net_driver);
+static __init int virtio_net_driver_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "AP_VIRT_NET_ONLINE",
+ virtnet_cpu_online,
+ virtnet_cpu_down_prep);
+ if (ret < 0)
+ goto out;
+ virtionet_online = ret;
+ ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "VIRT_NET_DEAD",
+ NULL, virtnet_cpu_dead);
+ if (ret)
+ goto err_dead;
+
+ ret = register_virtio_driver(&virtio_net_driver);
+ if (ret)
+ goto err_virtio;
+ return 0;
+err_virtio:
+ cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
+err_dead:
+ cpuhp_remove_multi_state(virtionet_online);
+out:
+ return ret;
+}
+module_init(virtio_net_driver_init);
+
+static __exit void virtio_net_driver_exit(void)
+{
+ cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
+ cpuhp_remove_multi_state(virtionet_online);
+ unregister_virtio_driver(&virtio_net_driver);
+}
+module_exit(virtio_net_driver_exit);
MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_DESCRIPTION("Virtio network driver");
diff --git a/drivers/net/wireless/ath/carl9170/debug.c b/drivers/net/wireless/ath/carl9170/debug.c
index 6808db4..ec3a64e 100644
--- a/drivers/net/wireless/ath/carl9170/debug.c
+++ b/drivers/net/wireless/ath/carl9170/debug.c
@@ -75,7 +75,8 @@
if (!ar)
return -ENODEV;
- dfops = container_of(file->f_op, struct carl9170_debugfs_fops, fops);
+ dfops = container_of(debugfs_real_fops(file),
+ struct carl9170_debugfs_fops, fops);
if (!dfops->read)
return -ENOSYS;
@@ -127,7 +128,8 @@
if (!ar)
return -ENODEV;
- dfops = container_of(file->f_op, struct carl9170_debugfs_fops, fops);
+ dfops = container_of(debugfs_real_fops(file),
+ struct carl9170_debugfs_fops, fops);
if (!dfops->write)
return -ENOSYS;
diff --git a/drivers/net/wireless/broadcom/b43/debugfs.c b/drivers/net/wireless/broadcom/b43/debugfs.c
index b4bcd94..7704638 100644
--- a/drivers/net/wireless/broadcom/b43/debugfs.c
+++ b/drivers/net/wireless/broadcom/b43/debugfs.c
@@ -524,7 +524,8 @@
goto out_unlock;
}
- dfops = container_of(file->f_op, struct b43_debugfs_fops, fops);
+ dfops = container_of(debugfs_real_fops(file),
+ struct b43_debugfs_fops, fops);
if (!dfops->read) {
err = -ENOSYS;
goto out_unlock;
@@ -585,7 +586,8 @@
goto out_unlock;
}
- dfops = container_of(file->f_op, struct b43_debugfs_fops, fops);
+ dfops = container_of(debugfs_real_fops(file),
+ struct b43_debugfs_fops, fops);
if (!dfops->write) {
err = -ENOSYS;
goto out_unlock;
diff --git a/drivers/net/wireless/broadcom/b43legacy/debugfs.c b/drivers/net/wireless/broadcom/b43legacy/debugfs.c
index 090910e..82ef56e 100644
--- a/drivers/net/wireless/broadcom/b43legacy/debugfs.c
+++ b/drivers/net/wireless/broadcom/b43legacy/debugfs.c
@@ -221,7 +221,8 @@
goto out_unlock;
}
- dfops = container_of(file->f_op, struct b43legacy_debugfs_fops, fops);
+ dfops = container_of(debugfs_real_fops(file),
+ struct b43legacy_debugfs_fops, fops);
if (!dfops->read) {
err = -ENOSYS;
goto out_unlock;
@@ -287,7 +288,8 @@
goto out_unlock;
}
- dfops = container_of(file->f_op, struct b43legacy_debugfs_fops, fops);
+ dfops = container_of(debugfs_real_fops(file),
+ struct b43legacy_debugfs_fops, fops);
if (!dfops->write) {
err = -ENOSYS;
goto out_unlock;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index c6585ab..b3a87a3 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -513,6 +513,15 @@
int hdrlen = ieee80211_hdrlen(hdr->frame_control);
int queue;
+ /* IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used
+ * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel
+ * queue. STATION (HS2.0) uses the auxiliary context of the FW,
+ * and hence needs to be sent on the aux queue
+ */
+ if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
+ skb_info->control.vif->type == NL80211_IFTYPE_STATION)
+ IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue;
+
memcpy(&info, skb->cb, sizeof(info));
if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_AMPDU))
@@ -526,16 +535,6 @@
/* This holds the amsdu headers length */
skb_info->driver_data[0] = (void *)(uintptr_t)0;
- /*
- * IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used
- * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel
- * queue. STATION (HS2.0) uses the auxiliary context of the FW,
- * and hence needs to be sent on the aux queue
- */
- if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
- info.control.vif->type == NL80211_IFTYPE_STATION)
- IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue;
-
queue = info.hw_queue;
/*
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 6a31f26..daf4c78 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -271,6 +271,11 @@
be->dev = dev;
dev_set_drvdata(&dev->dev, be);
+ be->state = XenbusStateInitialising;
+ err = xenbus_switch_state(dev, XenbusStateInitialising);
+ if (err)
+ goto fail;
+
sg = 1;
do {
@@ -383,11 +388,6 @@
be->hotplug_script = script;
- err = xenbus_switch_state(dev, XenbusStateInitWait);
- if (err)
- goto fail;
-
- be->state = XenbusStateInitWait;
/* This kicks hotplug scripts, so do it immediately. */
err = backend_create_xenvif(be);
@@ -492,20 +492,20 @@
/* Handle backend state transitions:
*
- * The backend state starts in InitWait and the following transitions are
+ * The backend state starts in Initialising and the following transitions are
* allowed.
*
- * InitWait -> Connected
+ * Initialising -> InitWait -> Connected
+ * \
+ * \ ^ \ |
+ * \ | \ |
+ * \ | \ |
+ * \ | \ |
+ * \ | \ |
+ * \ | \ |
+ * V | V V
*
- * ^ \ |
- * | \ |
- * | \ |
- * | \ |
- * | \ |
- * | \ |
- * | V V
- *
- * Closed <-> Closing
+ * Closed <-> Closing
*
* The state argument specifies the eventual state of the backend and the
* function transitions to that state via the shortest path.
@@ -515,6 +515,20 @@
{
while (be->state != state) {
switch (be->state) {
+ case XenbusStateInitialising:
+ switch (state) {
+ case XenbusStateInitWait:
+ case XenbusStateConnected:
+ case XenbusStateClosing:
+ backend_switch_state(be, XenbusStateInitWait);
+ break;
+ case XenbusStateClosed:
+ backend_switch_state(be, XenbusStateClosed);
+ break;
+ default:
+ BUG();
+ }
+ break;
case XenbusStateClosed:
switch (state) {
case XenbusStateInitWait:
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 715583f..4d7bbd2 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -99,8 +99,11 @@
nvdimm_map->size = size;
kref_init(&nvdimm_map->kref);
- if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev)))
+ if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev))) {
+ dev_err(&nvdimm_bus->dev, "failed to request %pa + %zd for %s\n",
+ &offset, size, dev_name(dev));
goto err_request_region;
+ }
if (flags)
nvdimm_map->mem = memremap(offset, size, flags);
@@ -171,6 +174,9 @@
kref_get(&nvdimm_map->kref);
nvdimm_bus_unlock(dev);
+ if (!nvdimm_map)
+ return NULL;
+
if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map))
return NULL;
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 8024a0e..0b78a82 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -52,10 +52,28 @@
struct nd_region_data {
int ns_count;
int ns_active;
- unsigned int flush_mask;
- void __iomem *flush_wpq[0][0];
+ unsigned int hints_shift;
+ void __iomem *flush_wpq[0];
};
+static inline void __iomem *ndrd_get_flush_wpq(struct nd_region_data *ndrd,
+ int dimm, int hint)
+{
+ unsigned int num = 1 << ndrd->hints_shift;
+ unsigned int mask = num - 1;
+
+ return ndrd->flush_wpq[dimm * num + (hint & mask)];
+}
+
+static inline void ndrd_set_flush_wpq(struct nd_region_data *ndrd, int dimm,
+ int hint, void __iomem *flush)
+{
+ unsigned int num = 1 << ndrd->hints_shift;
+ unsigned int mask = num - 1;
+
+ ndrd->flush_wpq[dimm * num + (hint & mask)] = flush;
+}
+
static inline struct nd_namespace_index *to_namespace_index(
struct nvdimm_drvdata *ndd, int i)
{
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index e8d5ba7..4c0ac4a 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -38,7 +38,7 @@
dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
- for (i = 0; i < nvdimm->num_flush; i++) {
+ for (i = 0; i < (1 << ndrd->hints_shift); i++) {
struct resource *res = &nvdimm->flush_wpq[i];
unsigned long pfn = PHYS_PFN(res->start);
void __iomem *flush_page;
@@ -54,14 +54,15 @@
if (j < i)
flush_page = (void __iomem *) ((unsigned long)
- ndrd->flush_wpq[dimm][j] & PAGE_MASK);
+ ndrd_get_flush_wpq(ndrd, dimm, j)
+ & PAGE_MASK);
else
flush_page = devm_nvdimm_ioremap(dev,
- PHYS_PFN(pfn), PAGE_SIZE);
+ PFN_PHYS(pfn), PAGE_SIZE);
if (!flush_page)
return -ENXIO;
- ndrd->flush_wpq[dimm][i] = flush_page
- + (res->start & ~PAGE_MASK);
+ ndrd_set_flush_wpq(ndrd, dimm, i, flush_page
+ + (res->start & ~PAGE_MASK));
}
return 0;
@@ -93,7 +94,10 @@
return -ENOMEM;
dev_set_drvdata(dev, ndrd);
- ndrd->flush_mask = (1 << ilog2(num_flush)) - 1;
+ if (!num_flush)
+ return 0;
+
+ ndrd->hints_shift = ilog2(num_flush);
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
@@ -900,8 +904,8 @@
*/
wmb();
for (i = 0; i < nd_region->ndr_mappings; i++)
- if (ndrd->flush_wpq[i][0])
- writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]);
+ if (ndrd_get_flush_wpq(ndrd, i, 0))
+ writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
wmb();
}
EXPORT_SYMBOL_GPL(nvdimm_flush);
@@ -925,7 +929,7 @@
for (i = 0; i < nd_region->ndr_mappings; i++)
/* flush hints present, flushing required */
- if (ndrd->flush_wpq[i][0])
+ if (ndrd_get_flush_wpq(ndrd, i, 0))
return 1;
/*
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index c2c2c28..fbdb226 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -561,7 +561,6 @@
queue = &ctrl->queues[idx];
queue->ctrl = ctrl;
- queue->flags = 0;
init_completion(&queue->cm_done);
if (idx > 0)
@@ -595,6 +594,7 @@
goto out_destroy_cm_id;
}
+ clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags);
return 0;
diff --git a/drivers/nvmem/rockchip-efuse.c b/drivers/nvmem/rockchip-efuse.c
index 4d3f391..423907b 100644
--- a/drivers/nvmem/rockchip-efuse.c
+++ b/drivers/nvmem/rockchip-efuse.c
@@ -22,17 +22,29 @@
#include <linux/nvmem-provider.h>
#include <linux/slab.h>
#include <linux/of.h>
+#include <linux/of_platform.h>
#include <linux/platform_device.h>
-#define EFUSE_A_SHIFT 6
-#define EFUSE_A_MASK 0x3ff
-#define EFUSE_PGENB BIT(3)
-#define EFUSE_LOAD BIT(2)
-#define EFUSE_STROBE BIT(1)
-#define EFUSE_CSB BIT(0)
+#define RK3288_A_SHIFT 6
+#define RK3288_A_MASK 0x3ff
+#define RK3288_PGENB BIT(3)
+#define RK3288_LOAD BIT(2)
+#define RK3288_STROBE BIT(1)
+#define RK3288_CSB BIT(0)
-#define REG_EFUSE_CTRL 0x0000
-#define REG_EFUSE_DOUT 0x0004
+#define RK3399_A_SHIFT 16
+#define RK3399_A_MASK 0x3ff
+#define RK3399_NBYTES 4
+#define RK3399_STROBSFTSEL BIT(9)
+#define RK3399_RSB BIT(7)
+#define RK3399_PD BIT(5)
+#define RK3399_PGENB BIT(3)
+#define RK3399_LOAD BIT(2)
+#define RK3399_STROBE BIT(1)
+#define RK3399_CSB BIT(0)
+
+#define REG_EFUSE_CTRL 0x0000
+#define REG_EFUSE_DOUT 0x0004
struct rockchip_efuse_chip {
struct device *dev;
@@ -40,8 +52,8 @@
struct clk *clk;
};
-static int rockchip_efuse_read(void *context, unsigned int offset,
- void *val, size_t bytes)
+static int rockchip_rk3288_efuse_read(void *context, unsigned int offset,
+ void *val, size_t bytes)
{
struct rockchip_efuse_chip *efuse = context;
u8 *buf = val;
@@ -53,27 +65,82 @@
return ret;
}
- writel(EFUSE_LOAD | EFUSE_PGENB, efuse->base + REG_EFUSE_CTRL);
+ writel(RK3288_LOAD | RK3288_PGENB, efuse->base + REG_EFUSE_CTRL);
udelay(1);
while (bytes--) {
writel(readl(efuse->base + REG_EFUSE_CTRL) &
- (~(EFUSE_A_MASK << EFUSE_A_SHIFT)),
+ (~(RK3288_A_MASK << RK3288_A_SHIFT)),
efuse->base + REG_EFUSE_CTRL);
writel(readl(efuse->base + REG_EFUSE_CTRL) |
- ((offset++ & EFUSE_A_MASK) << EFUSE_A_SHIFT),
+ ((offset++ & RK3288_A_MASK) << RK3288_A_SHIFT),
efuse->base + REG_EFUSE_CTRL);
udelay(1);
writel(readl(efuse->base + REG_EFUSE_CTRL) |
- EFUSE_STROBE, efuse->base + REG_EFUSE_CTRL);
+ RK3288_STROBE, efuse->base + REG_EFUSE_CTRL);
udelay(1);
*buf++ = readb(efuse->base + REG_EFUSE_DOUT);
writel(readl(efuse->base + REG_EFUSE_CTRL) &
- (~EFUSE_STROBE), efuse->base + REG_EFUSE_CTRL);
+ (~RK3288_STROBE), efuse->base + REG_EFUSE_CTRL);
udelay(1);
}
/* Switch to standby mode */
- writel(EFUSE_PGENB | EFUSE_CSB, efuse->base + REG_EFUSE_CTRL);
+ writel(RK3288_PGENB | RK3288_CSB, efuse->base + REG_EFUSE_CTRL);
+
+ clk_disable_unprepare(efuse->clk);
+
+ return 0;
+}
+
+static int rockchip_rk3399_efuse_read(void *context, unsigned int offset,
+ void *val, size_t bytes)
+{
+ struct rockchip_efuse_chip *efuse = context;
+ unsigned int addr_start, addr_end, addr_offset, addr_len;
+ u32 out_value;
+ u8 *buf;
+ int ret, i = 0;
+
+ ret = clk_prepare_enable(efuse->clk);
+ if (ret < 0) {
+ dev_err(efuse->dev, "failed to prepare/enable efuse clk\n");
+ return ret;
+ }
+
+ addr_start = rounddown(offset, RK3399_NBYTES) / RK3399_NBYTES;
+ addr_end = roundup(offset + bytes, RK3399_NBYTES) / RK3399_NBYTES;
+ addr_offset = offset % RK3399_NBYTES;
+ addr_len = addr_end - addr_start;
+
+ buf = kzalloc(sizeof(*buf) * addr_len * RK3399_NBYTES, GFP_KERNEL);
+ if (!buf) {
+ clk_disable_unprepare(efuse->clk);
+ return -ENOMEM;
+ }
+
+ writel(RK3399_LOAD | RK3399_PGENB | RK3399_STROBSFTSEL | RK3399_RSB,
+ efuse->base + REG_EFUSE_CTRL);
+ udelay(1);
+ while (addr_len--) {
+ writel(readl(efuse->base + REG_EFUSE_CTRL) | RK3399_STROBE |
+ ((addr_start++ & RK3399_A_MASK) << RK3399_A_SHIFT),
+ efuse->base + REG_EFUSE_CTRL);
+ udelay(1);
+ out_value = readl(efuse->base + REG_EFUSE_DOUT);
+ writel(readl(efuse->base + REG_EFUSE_CTRL) & (~RK3399_STROBE),
+ efuse->base + REG_EFUSE_CTRL);
+ udelay(1);
+
+ memcpy(&buf[i], &out_value, RK3399_NBYTES);
+ i += RK3399_NBYTES;
+ }
+
+ /* Switch to standby mode */
+ writel(RK3399_PD | RK3399_CSB, efuse->base + REG_EFUSE_CTRL);
+
+ memcpy(val, buf + addr_offset, bytes);
+
+ kfree(buf);
clk_disable_unprepare(efuse->clk);
@@ -89,7 +156,27 @@
};
static const struct of_device_id rockchip_efuse_match[] = {
- { .compatible = "rockchip,rockchip-efuse", },
+ /* deprecated but kept around for dts binding compatibility */
+ {
+ .compatible = "rockchip,rockchip-efuse",
+ .data = (void *)&rockchip_rk3288_efuse_read,
+ },
+ {
+ .compatible = "rockchip,rk3066a-efuse",
+ .data = (void *)&rockchip_rk3288_efuse_read,
+ },
+ {
+ .compatible = "rockchip,rk3188-efuse",
+ .data = (void *)&rockchip_rk3288_efuse_read,
+ },
+ {
+ .compatible = "rockchip,rk3288-efuse",
+ .data = (void *)&rockchip_rk3288_efuse_read,
+ },
+ {
+ .compatible = "rockchip,rk3399-efuse",
+ .data = (void *)&rockchip_rk3399_efuse_read,
+ },
{ /* sentinel */},
};
MODULE_DEVICE_TABLE(of, rockchip_efuse_match);
@@ -99,6 +186,14 @@
struct resource *res;
struct nvmem_device *nvmem;
struct rockchip_efuse_chip *efuse;
+ const struct of_device_id *match;
+ struct device *dev = &pdev->dev;
+
+ match = of_match_device(dev->driver->of_match_table, dev);
+ if (!match || !match->data) {
+ dev_err(dev, "failed to get match data\n");
+ return -EINVAL;
+ }
efuse = devm_kzalloc(&pdev->dev, sizeof(struct rockchip_efuse_chip),
GFP_KERNEL);
@@ -116,7 +211,7 @@
efuse->dev = &pdev->dev;
econfig.size = resource_size(res);
- econfig.reg_read = rockchip_efuse_read;
+ econfig.reg_read = match->data;
econfig.priv = efuse;
econfig.dev = efuse->dev;
nvmem = nvmem_register(&econfig);
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 085c638..c89d5d2 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -924,7 +924,7 @@
#ifdef CONFIG_SERIAL_EARLYCON
-static int __init early_init_dt_scan_chosen_serial(void)
+int __init early_init_dt_scan_chosen_stdout(void)
{
int offset;
const char *p, *q, *options = NULL;
@@ -968,15 +968,6 @@
}
return -ENODEV;
}
-
-static int __init setup_of_earlycon(char *buf)
-{
- if (buf)
- return 0;
-
- return early_init_dt_scan_chosen_serial();
-}
-early_param("earlycon", setup_of_earlycon);
#endif
/**
diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c
index ed5a097..f63d4b0d 100644
--- a/drivers/of/of_numa.c
+++ b/drivers/of/of_numa.c
@@ -16,6 +16,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#define pr_fmt(fmt) "OF: NUMA: " fmt
+
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/nodemask.h>
@@ -49,10 +51,9 @@
if (r)
continue;
- pr_debug("NUMA: CPU on %u\n", nid);
+ pr_debug("CPU on %u\n", nid);
if (nid >= MAX_NUMNODES)
- pr_warn("NUMA: Node id %u exceeds maximum value\n",
- nid);
+ pr_warn("Node id %u exceeds maximum value\n", nid);
else
node_set(nid, numa_nodes_parsed);
}
@@ -63,13 +64,9 @@
struct device_node *np = NULL;
struct resource rsrc;
u32 nid;
- int r = 0;
+ int i, r;
- for (;;) {
- np = of_find_node_by_type(np, "memory");
- if (!np)
- break;
-
+ for_each_node_by_type(np, "memory") {
r = of_property_read_u32(np, "numa-node-id", &nid);
if (r == -EINVAL)
/*
@@ -78,27 +75,23 @@
* "numa-node-id" property
*/
continue;
- else if (r)
- /* some other error */
- break;
- r = of_address_to_resource(np, 0, &rsrc);
- if (r) {
- pr_err("NUMA: bad reg property in memory node\n");
- break;
+ if (nid >= MAX_NUMNODES) {
+ pr_warn("Node id %u exceeds maximum value\n", nid);
+ r = -EINVAL;
}
- pr_debug("NUMA: base = %llx len = %llx, node = %u\n",
- rsrc.start, rsrc.end - rsrc.start + 1, nid);
+ for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++)
+ r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1);
-
- r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1);
- if (r)
- break;
+ if (!i || r) {
+ of_node_put(np);
+ pr_err("bad property in memory node\n");
+ return r ? : -EINVAL;
+ }
}
- of_node_put(np);
- return r;
+ return 0;
}
static int __init of_numa_parse_distance_map_v1(struct device_node *map)
@@ -107,17 +100,17 @@
int entry_count;
int i;
- pr_info("NUMA: parsing numa-distance-map-v1\n");
+ pr_info("parsing numa-distance-map-v1\n");
matrix = of_get_property(map, "distance-matrix", NULL);
if (!matrix) {
- pr_err("NUMA: No distance-matrix property in distance-map\n");
+ pr_err("No distance-matrix property in distance-map\n");
return -EINVAL;
}
entry_count = of_property_count_u32_elems(map, "distance-matrix");
if (entry_count <= 0) {
- pr_err("NUMA: Invalid distance-matrix\n");
+ pr_err("Invalid distance-matrix\n");
return -EINVAL;
}
@@ -132,7 +125,7 @@
matrix++;
numa_set_distance(nodea, nodeb, distance);
- pr_debug("NUMA: distance[node%d -> node%d] = %d\n",
+ pr_debug("distance[node%d -> node%d] = %d\n",
nodea, nodeb, distance);
/* Set default distance of node B->A same as A->B */
@@ -166,8 +159,6 @@
np = of_node_get(device);
while (np) {
- struct device_node *parent;
-
r = of_property_read_u32(np, "numa-node-id", &nid);
/*
* -EINVAL indicates the property was not found, and
@@ -178,22 +169,15 @@
if (r != -EINVAL)
break;
- parent = of_get_parent(np);
- of_node_put(np);
- np = parent;
+ np = of_get_next_parent(np);
}
if (np && r)
- pr_warn("NUMA: Invalid \"numa-node-id\" property in node %s\n",
+ pr_warn("Invalid \"numa-node-id\" property in node %s\n",
np->name);
of_node_put(np);
- if (!r) {
- if (nid >= MAX_NUMNODES)
- pr_warn("NUMA: Node id %u exceeds maximum value\n",
- nid);
- else
- return nid;
- }
+ if (!r)
+ return nid;
return NUMA_NO_NODE;
}
diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c
index bdef916..2498a6c 100644
--- a/drivers/oprofile/timer_int.c
+++ b/drivers/oprofile/timer_int.c
@@ -74,37 +74,39 @@
put_online_cpus();
}
-static int oprofile_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int oprofile_timer_online(unsigned int cpu)
{
- long cpu = (long) hcpu;
-
- switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- smp_call_function_single(cpu, __oprofile_hrtimer_start,
- NULL, 1);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- __oprofile_hrtimer_stop(cpu);
- break;
- }
- return NOTIFY_OK;
+ local_irq_disable();
+ __oprofile_hrtimer_start(NULL);
+ local_irq_enable();
+ return 0;
}
-static struct notifier_block __refdata oprofile_cpu_notifier = {
- .notifier_call = oprofile_cpu_notify,
-};
+static int oprofile_timer_prep_down(unsigned int cpu)
+{
+ __oprofile_hrtimer_stop(cpu);
+ return 0;
+}
+
+static enum cpuhp_state hp_online;
static int oprofile_hrtimer_setup(void)
{
- return register_hotcpu_notifier(&oprofile_cpu_notifier);
+ int ret;
+
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "oprofile/timer:online",
+ oprofile_timer_online,
+ oprofile_timer_prep_down);
+ if (ret < 0)
+ return ret;
+ hp_online = ret;
+ return 0;
}
static void oprofile_hrtimer_shutdown(void)
{
- unregister_hotcpu_notifier(&oprofile_cpu_notifier);
+ cpuhp_remove_state_nocalls(hp_online);
}
int oprofile_timer_init(struct oprofile_operations *ops)
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 98f1222..bfdd074 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -19,6 +19,7 @@
#include <linux/smp.h>
#include <linux/errno.h>
#include <linux/io.h>
+#include <linux/acpi_iort.h>
#include <linux/slab.h>
#include <linux/irqdomain.h>
#include <linux/of_irq.h>
@@ -549,15 +550,23 @@
return ret;
}
-static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec)
+static struct msi_desc *
+msi_setup_entry(struct pci_dev *dev, int nvec, bool affinity)
{
- u16 control;
+ struct cpumask *masks = NULL;
struct msi_desc *entry;
+ u16 control;
+
+ if (affinity) {
+ masks = irq_create_affinity_masks(dev->irq_affinity, nvec);
+ if (!masks)
+ pr_err("Unable to allocate affinity masks, ignoring\n");
+ }
/* MSI Entry Initialization */
- entry = alloc_msi_entry(&dev->dev);
+ entry = alloc_msi_entry(&dev->dev, nvec, masks);
if (!entry)
- return NULL;
+ goto out;
pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
@@ -568,8 +577,6 @@
entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */
entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1;
entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec));
- entry->nvec_used = nvec;
- entry->affinity = dev->irq_affinity;
if (control & PCI_MSI_FLAGS_64BIT)
entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
@@ -580,6 +587,8 @@
if (entry->msi_attrib.maskbit)
pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
+out:
+ kfree(masks);
return entry;
}
@@ -608,7 +617,7 @@
* an error, and a positive return value indicates the number of interrupts
* which could have been allocated.
*/
-static int msi_capability_init(struct pci_dev *dev, int nvec)
+static int msi_capability_init(struct pci_dev *dev, int nvec, bool affinity)
{
struct msi_desc *entry;
int ret;
@@ -616,7 +625,7 @@
pci_msi_set_enable(dev, 0); /* Disable MSI during set up */
- entry = msi_setup_entry(dev, nvec);
+ entry = msi_setup_entry(dev, nvec, affinity);
if (!entry)
return -ENOMEM;
@@ -679,28 +688,29 @@
}
static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
- struct msix_entry *entries, int nvec)
+ struct msix_entry *entries, int nvec,
+ bool affinity)
{
- const struct cpumask *mask = NULL;
+ struct cpumask *curmsk, *masks = NULL;
struct msi_desc *entry;
- int cpu = -1, i;
+ int ret, i;
- for (i = 0; i < nvec; i++) {
- if (dev->irq_affinity) {
- cpu = cpumask_next(cpu, dev->irq_affinity);
- if (cpu >= nr_cpu_ids)
- cpu = cpumask_first(dev->irq_affinity);
- mask = cpumask_of(cpu);
- }
+ if (affinity) {
+ masks = irq_create_affinity_masks(dev->irq_affinity, nvec);
+ if (!masks)
+ pr_err("Unable to allocate affinity masks, ignoring\n");
+ }
- entry = alloc_msi_entry(&dev->dev);
+ for (i = 0, curmsk = masks; i < nvec; i++) {
+ entry = alloc_msi_entry(&dev->dev, 1, curmsk);
if (!entry) {
if (!i)
iounmap(base);
else
free_msi_irqs(dev);
/* No enough memory. Don't try again */
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
entry->msi_attrib.is_msix = 1;
@@ -711,12 +721,14 @@
entry->msi_attrib.entry_nr = i;
entry->msi_attrib.default_irq = dev->irq;
entry->mask_base = base;
- entry->nvec_used = 1;
- entry->affinity = mask;
list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
+ if (masks)
+ curmsk++;
}
-
+ ret = 0;
+out:
+ kfree(masks);
return 0;
}
@@ -745,8 +757,8 @@
* single MSI-X irq. A return of zero indicates the successful setup of
* requested MSI-X entries with allocated irqs or non-zero for otherwise.
**/
-static int msix_capability_init(struct pci_dev *dev,
- struct msix_entry *entries, int nvec)
+static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
+ int nvec, bool affinity)
{
int ret;
u16 control;
@@ -761,7 +773,7 @@
if (!base)
return -ENOMEM;
- ret = msix_setup_entries(dev, base, entries, nvec);
+ ret = msix_setup_entries(dev, base, entries, nvec, affinity);
if (ret)
return ret;
@@ -941,22 +953,8 @@
}
EXPORT_SYMBOL(pci_msix_vec_count);
-/**
- * pci_enable_msix - configure device's MSI-X capability structure
- * @dev: pointer to the pci_dev data structure of MSI-X device function
- * @entries: pointer to an array of MSI-X entries (optional)
- * @nvec: number of MSI-X irqs requested for allocation by device driver
- *
- * Setup the MSI-X capability structure of device function with the number
- * of requested irqs upon its software driver call to request for
- * MSI-X mode enabled on its hardware device function. A return of zero
- * indicates the successful configuration of MSI-X capability structure
- * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
- * Or a return of > 0 indicates that driver request is exceeding the number
- * of irqs or MSI-X vectors available. Driver should use the returned value to
- * re-send its request.
- **/
-int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
+static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
+ int nvec, bool affinity)
{
int nr_entries;
int i, j;
@@ -988,7 +986,27 @@
dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
return -EINVAL;
}
- return msix_capability_init(dev, entries, nvec);
+ return msix_capability_init(dev, entries, nvec, affinity);
+}
+
+/**
+ * pci_enable_msix - configure device's MSI-X capability structure
+ * @dev: pointer to the pci_dev data structure of MSI-X device function
+ * @entries: pointer to an array of MSI-X entries (optional)
+ * @nvec: number of MSI-X irqs requested for allocation by device driver
+ *
+ * Setup the MSI-X capability structure of device function with the number
+ * of requested irqs upon its software driver call to request for
+ * MSI-X mode enabled on its hardware device function. A return of zero
+ * indicates the successful configuration of MSI-X capability structure
+ * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
+ * Or a return of > 0 indicates that driver request is exceeding the number
+ * of irqs or MSI-X vectors available. Driver should use the returned value to
+ * re-send its request.
+ **/
+int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
+{
+ return __pci_enable_msix(dev, entries, nvec, false);
}
EXPORT_SYMBOL(pci_enable_msix);
@@ -1041,6 +1059,7 @@
static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
unsigned int flags)
{
+ bool affinity = flags & PCI_IRQ_AFFINITY;
int nvec;
int rc;
@@ -1069,19 +1088,17 @@
nvec = maxvec;
for (;;) {
- if (flags & PCI_IRQ_AFFINITY) {
- dev->irq_affinity = irq_create_affinity_mask(&nvec);
+ if (affinity) {
+ nvec = irq_calc_affinity_vectors(dev->irq_affinity,
+ nvec);
if (nvec < minvec)
return -ENOSPC;
}
- rc = msi_capability_init(dev, nvec);
+ rc = msi_capability_init(dev, nvec, affinity);
if (rc == 0)
return nvec;
- kfree(dev->irq_affinity);
- dev->irq_affinity = NULL;
-
if (rc < 0)
return rc;
if (rc < minvec)
@@ -1113,26 +1130,24 @@
struct msix_entry *entries, int minvec, int maxvec,
unsigned int flags)
{
- int nvec = maxvec;
- int rc;
+ bool affinity = flags & PCI_IRQ_AFFINITY;
+ int rc, nvec = maxvec;
if (maxvec < minvec)
return -ERANGE;
for (;;) {
- if (flags & PCI_IRQ_AFFINITY) {
- dev->irq_affinity = irq_create_affinity_mask(&nvec);
+ if (affinity) {
+ nvec = irq_calc_affinity_vectors(dev->irq_affinity,
+ nvec);
if (nvec < minvec)
return -ENOSPC;
}
- rc = pci_enable_msix(dev, entries, nvec);
+ rc = __pci_enable_msix(dev, entries, nvec, affinity);
if (rc == 0)
return nvec;
- kfree(dev->irq_affinity);
- dev->irq_affinity = NULL;
-
if (rc < 0)
return rc;
if (rc < minvec)
@@ -1256,6 +1271,37 @@
}
EXPORT_SYMBOL(pci_irq_vector);
+/**
+ * pci_irq_get_affinity - return the affinity of a particular msi vector
+ * @dev: PCI device to operate on
+ * @nr: device-relative interrupt vector index (0-based).
+ */
+const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
+{
+ if (dev->msix_enabled) {
+ struct msi_desc *entry;
+ int i = 0;
+
+ for_each_pci_msi_entry(entry, dev) {
+ if (i == nr)
+ return entry->affinity;
+ i++;
+ }
+ WARN_ON_ONCE(1);
+ return NULL;
+ } else if (dev->msi_enabled) {
+ struct msi_desc *entry = first_pci_msi_entry(dev);
+
+ if (WARN_ON_ONCE(!entry || nr >= entry->nvec_used))
+ return NULL;
+
+ return &entry->affinity[nr];
+ } else {
+ return cpu_possible_mask;
+ }
+}
+EXPORT_SYMBOL(pci_irq_get_affinity);
+
struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
{
return to_pci_dev(desc->dev);
@@ -1502,8 +1548,8 @@
pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
of_node = irq_domain_get_of_node(domain);
- if (of_node)
- rid = of_msi_map_rid(&pdev->dev, of_node, rid);
+ rid = of_node ? of_msi_map_rid(&pdev->dev, of_node, rid) :
+ iort_msi_map_rid(&pdev->dev, rid);
return rid;
}
@@ -1519,9 +1565,13 @@
*/
struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev)
{
+ struct irq_domain *dom;
u32 rid = 0;
pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
- return of_msi_map_get_device_domain(&pdev->dev, rid);
+ dom = of_msi_map_get_device_domain(&pdev->dev, rid);
+ if (!dom)
+ dom = iort_get_device_domain(&pdev->dev, rid);
+ return dom;
}
#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
index c878aa7..55f453d 100644
--- a/drivers/pci/pci-mid.c
+++ b/drivers/pci/pci-mid.c
@@ -60,8 +60,13 @@
#define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
+/*
+ * This table should be in sync with the one in
+ * arch/x86/platform/intel-mid/pwr.c.
+ */
static const struct x86_cpu_id lpss_cpu_ids[] = {
- ICPU(INTEL_FAM6_ATOM_MERRIFIELD1),
+ ICPU(INTEL_FAM6_ATOM_PENWELL),
+ ICPU(INTEL_FAM6_ATOM_MERRIFIELD),
{}
};
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index aab9d51..415956c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -480,6 +480,30 @@
EXPORT_SYMBOL(pci_find_parent_resource);
/**
+ * pci_find_resource - Return matching PCI device resource
+ * @dev: PCI device to query
+ * @res: Resource to look for
+ *
+ * Goes over standard PCI resources (BARs) and checks if the given resource
+ * is partially or fully contained in any of them. In that case the
+ * matching resource is returned, %NULL otherwise.
+ */
+struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res)
+{
+ int i;
+
+ for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+ struct resource *r = &dev->resource[i];
+
+ if (r->start && resource_contains(r, res))
+ return r;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL(pci_find_resource);
+
+/**
* pci_find_pcie_root_port - return PCIe Root Port
* @dev: PCI device to query
*
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index c74059e..f30ca75 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -25,6 +25,7 @@
#include <linux/ioport.h>
#include <linux/cache.h>
#include <linux/slab.h>
+#include <linux/acpi.h>
#include "pci.h"
unsigned int pci_flags;
@@ -1852,8 +1853,13 @@
{
struct pci_bus *root_bus;
- list_for_each_entry(root_bus, &pci_root_buses, node)
+ list_for_each_entry(root_bus, &pci_root_buses, node) {
pci_assign_unassigned_root_bus_resources(root_bus);
+
+ /* Make sure the root bridge has a companion ACPI device: */
+ if (ACPI_HANDLE(root_bus->bridge))
+ acpi_ioapic_add(ACPI_HANDLE(root_bus->bridge));
+ }
}
void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index f5e1008..b37b572 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -534,6 +534,24 @@
return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
}
+static ssize_t armpmu_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct arm_pmu *armpmu = to_arm_pmu(dev_get_drvdata(dev));
+ return cpumap_print_to_pagebuf(true, buf, &armpmu->supported_cpus);
+}
+
+static DEVICE_ATTR(cpus, S_IRUGO, armpmu_cpumask_show, NULL);
+
+static struct attribute *armpmu_common_attrs[] = {
+ &dev_attr_cpus.attr,
+ NULL,
+};
+
+static struct attribute_group armpmu_common_attr_group = {
+ .attrs = armpmu_common_attrs,
+};
+
static void armpmu_init(struct arm_pmu *armpmu)
{
atomic_set(&armpmu->active_events, 0);
@@ -549,7 +567,10 @@
.stop = armpmu_stop,
.read = armpmu_read,
.filter_match = armpmu_filter_match,
+ .attr_groups = armpmu->attr_groups,
};
+ armpmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
+ &armpmu_common_attr_group;
}
/* Set at runtime when we know what CPU type we are. */
@@ -602,7 +623,7 @@
irqs = min(pmu_device->num_resources, num_possible_cpus());
irq = platform_get_irq(pmu_device, 0);
- if (irq >= 0 && irq_is_percpu(irq)) {
+ if (irq > 0 && irq_is_percpu(irq)) {
on_each_cpu_mask(&cpu_pmu->supported_cpus,
cpu_pmu_disable_percpu_irq, &irq, 1);
free_percpu_irq(irq, &hw_events->percpu_pmu);
@@ -616,7 +637,7 @@
if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
continue;
irq = platform_get_irq(pmu_device, i);
- if (irq >= 0)
+ if (irq > 0)
free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
}
}
@@ -638,7 +659,7 @@
}
irq = platform_get_irq(pmu_device, 0);
- if (irq >= 0 && irq_is_percpu(irq)) {
+ if (irq > 0 && irq_is_percpu(irq)) {
err = request_percpu_irq(irq, handler, "arm-pmu",
&hw_events->percpu_pmu);
if (err) {
@@ -688,28 +709,20 @@
return 0;
}
-static DEFINE_SPINLOCK(arm_pmu_lock);
-static LIST_HEAD(arm_pmu_list);
-
/*
* PMU hardware loses all context when a CPU goes offline.
* When a CPU is hotplugged back in, since some hardware registers are
* UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
* junk values out of them.
*/
-static int arm_perf_starting_cpu(unsigned int cpu)
+static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
{
- struct arm_pmu *pmu;
+ struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
- spin_lock(&arm_pmu_lock);
- list_for_each_entry(pmu, &arm_pmu_list, entry) {
-
- if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
- continue;
- if (pmu->reset)
- pmu->reset(pmu);
- }
- spin_unlock(&arm_pmu_lock);
+ if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+ return 0;
+ if (pmu->reset)
+ pmu->reset(pmu);
return 0;
}
@@ -821,9 +834,10 @@
if (!cpu_hw_events)
return -ENOMEM;
- spin_lock(&arm_pmu_lock);
- list_add_tail(&cpu_pmu->entry, &arm_pmu_list);
- spin_unlock(&arm_pmu_lock);
+ err = cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+ &cpu_pmu->node);
+ if (err)
+ goto out_free;
err = cpu_pm_pmu_register(cpu_pmu);
if (err)
@@ -859,9 +873,9 @@
return 0;
out_unregister:
- spin_lock(&arm_pmu_lock);
- list_del(&cpu_pmu->entry);
- spin_unlock(&arm_pmu_lock);
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+ &cpu_pmu->node);
+out_free:
free_percpu(cpu_hw_events);
return err;
}
@@ -869,9 +883,8 @@
static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
{
cpu_pm_pmu_unregister(cpu_pmu);
- spin_lock(&arm_pmu_lock);
- list_del(&cpu_pmu->entry);
- spin_unlock(&arm_pmu_lock);
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+ &cpu_pmu->node);
free_percpu(cpu_pmu->hw_events);
}
@@ -919,7 +932,7 @@
/* Check the IRQ type and prohibit a mix of PPIs and SPIs */
irq = platform_get_irq(pdev, i);
- if (irq >= 0) {
+ if (irq > 0) {
bool spi = !irq_is_percpu(irq);
if (i > 0 && spi != using_spi) {
@@ -970,7 +983,7 @@
if (cpumask_weight(&pmu->supported_cpus) == 0) {
int irq = platform_get_irq(pdev, 0);
- if (irq >= 0 && irq_is_percpu(irq)) {
+ if (irq > 0 && irq_is_percpu(irq)) {
/* If using PPIs, check the affinity of the partition */
int ret;
@@ -1029,7 +1042,7 @@
ret = of_pmu_irq_cfg(pmu);
if (!ret)
ret = init_fn(pmu);
- } else {
+ } else if (probe_table) {
cpumask_setall(&pmu->supported_cpus);
ret = probe_current_pmu(pmu, probe_table);
}
@@ -1039,6 +1052,7 @@
goto out_free;
}
+
ret = cpu_pmu_init(pmu);
if (ret)
goto out_free;
@@ -1069,9 +1083,9 @@
{
int ret;
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_STARTING,
- "AP_PERF_ARM_STARTING",
- arm_perf_starting_cpu, NULL);
+ ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_STARTING,
+ "AP_PERF_ARM_STARTING",
+ arm_perf_starting_cpu, NULL);
if (ret)
pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
ret);
diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c
index b86e1bc..a511d51 100644
--- a/drivers/platform/x86/intel_pmc_ipc.c
+++ b/drivers/platform/x86/intel_pmc_ipc.c
@@ -651,11 +651,15 @@
{
int ret;
- ret = ipc_create_tco_device();
- if (ret) {
- dev_err(ipcdev.dev, "Failed to add tco platform device\n");
- return ret;
+ /* If we have ACPI based watchdog use that instead */
+ if (!acpi_has_watchdog()) {
+ ret = ipc_create_tco_device();
+ if (ret) {
+ dev_err(ipcdev.dev, "Failed to add tco platform device\n");
+ return ret;
+ }
}
+
ret = ipc_create_punit_device();
if (ret) {
dev_err(ipcdev.dev, "Failed to add punit platform device\n");
diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c
index cf88f9b6..d8bf5a1 100644
--- a/drivers/pnp/isapnp/core.c
+++ b/drivers/pnp/isapnp/core.c
@@ -34,7 +34,7 @@
* 2003-08-11 Resource Management Updates - Adam Belay <ambx1@neo.rr.com>
*/
-#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/delay.h>
@@ -54,8 +54,6 @@
static int isapnp_reset = 1; /* reset all PnP cards (deactivate) */
static int isapnp_verbose = 1; /* verbose mode */
-MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
-MODULE_DESCRIPTION("Generic ISA Plug & Play support");
module_param(isapnp_disable, int, 0);
MODULE_PARM_DESC(isapnp_disable, "ISA Plug & Play disable");
module_param(isapnp_rdp, int, 0);
@@ -64,7 +62,6 @@
MODULE_PARM_DESC(isapnp_reset, "ISA Plug & Play reset all cards");
module_param(isapnp_verbose, int, 0);
MODULE_PARM_DESC(isapnp_verbose, "ISA Plug & Play verbose mode");
-MODULE_LICENSE("GPL");
#define _PIDXR 0x279
#define _PNPWRP 0xa79
diff --git a/drivers/power/avs/smartreflex.c b/drivers/power/avs/smartreflex.c
index db9973b..fa0f19b 100644
--- a/drivers/power/avs/smartreflex.c
+++ b/drivers/power/avs/smartreflex.c
@@ -136,7 +136,7 @@
if (IS_ERR(fck)) {
dev_err(&sr->pdev->dev, "%s: unable to get fck for device %s\n",
- __func__, dev_name(&sr->pdev->dev));
+ __func__, dev_name(&sr->pdev->dev));
return;
}
@@ -170,8 +170,8 @@
{
if (!sr_class || !(sr_class->enable) || !(sr_class->configure)) {
dev_warn(&sr->pdev->dev,
- "%s: smartreflex class driver not registered\n",
- __func__);
+ "%s: smartreflex class driver not registered\n",
+ __func__);
return;
}
@@ -183,8 +183,8 @@
{
if (!sr_class || !(sr_class->disable)) {
dev_warn(&sr->pdev->dev,
- "%s: smartreflex class driver not registered\n",
- __func__);
+ "%s: smartreflex class driver not registered\n",
+ __func__);
return;
}
@@ -225,9 +225,8 @@
error:
list_del(&sr_info->node);
- dev_err(&sr_info->pdev->dev, "%s: ERROR in registering"
- "interrupt handler. Smartreflex will"
- "not function as desired\n", __func__);
+ dev_err(&sr_info->pdev->dev, "%s: ERROR in registering interrupt handler. Smartreflex will not function as desired\n",
+ __func__);
return ret;
}
@@ -263,7 +262,7 @@
if (timeout >= SR_DISABLE_TIMEOUT)
dev_warn(&sr->pdev->dev, "%s: Smartreflex disable timedout\n",
- __func__);
+ __func__);
/* Disable MCUDisableAcknowledge interrupt & clear pending interrupt */
sr_modify_reg(sr, ERRCONFIG_V1, ERRCONFIG_MCUDISACKINTEN,
@@ -308,7 +307,7 @@
if (timeout >= SR_DISABLE_TIMEOUT)
dev_warn(&sr->pdev->dev, "%s: Smartreflex disable timedout\n",
- __func__);
+ __func__);
/* Disable MCUDisableAcknowledge interrupt & clear pending interrupt */
sr_write_reg(sr, IRQENABLE_CLR, IRQENABLE_MCUDISABLEACKINT);
@@ -322,7 +321,7 @@
if (!sr->nvalue_table) {
dev_warn(&sr->pdev->dev, "%s: Missing ntarget value table\n",
- __func__);
+ __func__);
return NULL;
}
@@ -356,8 +355,8 @@
u8 senp_shift, senn_shift;
if (!sr) {
- pr_warn("%s: NULL omap_sr from %pF\n", __func__,
- (void *)_RET_IP_);
+ pr_warn("%s: NULL omap_sr from %pF\n",
+ __func__, (void *)_RET_IP_);
return -EINVAL;
}
@@ -387,8 +386,8 @@
vpboundint_st = ERRCONFIG_VPBOUNDINTST_V2;
break;
default:
- dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex"
- "module without specifying the ip\n", __func__);
+ dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex module without specifying the ip\n",
+ __func__);
return -EINVAL;
}
@@ -423,8 +422,8 @@
u32 vpboundint_en, vpboundint_st;
if (!sr) {
- pr_warn("%s: NULL omap_sr from %pF\n", __func__,
- (void *)_RET_IP_);
+ pr_warn("%s: NULL omap_sr from %pF\n",
+ __func__, (void *)_RET_IP_);
return -EINVAL;
}
@@ -440,8 +439,8 @@
vpboundint_st = ERRCONFIG_VPBOUNDINTST_V2;
break;
default:
- dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex"
- "module without specifying the ip\n", __func__);
+ dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex module without specifying the ip\n",
+ __func__);
return -EINVAL;
}
@@ -478,8 +477,8 @@
u8 senp_shift, senn_shift;
if (!sr) {
- pr_warn("%s: NULL omap_sr from %pF\n", __func__,
- (void *)_RET_IP_);
+ pr_warn("%s: NULL omap_sr from %pF\n",
+ __func__, (void *)_RET_IP_);
return -EINVAL;
}
@@ -504,8 +503,8 @@
senp_shift = SRCONFIG_SENPENABLE_V2_SHIFT;
break;
default:
- dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex"
- "module without specifying the ip\n", __func__);
+ dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex module without specifying the ip\n",
+ __func__);
return -EINVAL;
}
@@ -537,8 +536,8 @@
IRQENABLE_MCUBOUNDSINT | IRQENABLE_MCUDISABLEACKINT);
break;
default:
- dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex"
- "module without specifying the ip\n", __func__);
+ dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex module without specifying the ip\n",
+ __func__);
return -EINVAL;
}
@@ -563,16 +562,16 @@
int ret;
if (!sr) {
- pr_warn("%s: NULL omap_sr from %pF\n", __func__,
- (void *)_RET_IP_);
+ pr_warn("%s: NULL omap_sr from %pF\n",
+ __func__, (void *)_RET_IP_);
return -EINVAL;
}
volt_data = omap_voltage_get_voltdata(sr->voltdm, volt);
if (IS_ERR(volt_data)) {
- dev_warn(&sr->pdev->dev, "%s: Unable to get voltage table"
- "for nominal voltage %ld\n", __func__, volt);
+ dev_warn(&sr->pdev->dev, "%s: Unable to get voltage table for nominal voltage %ld\n",
+ __func__, volt);
return PTR_ERR(volt_data);
}
@@ -615,8 +614,8 @@
void sr_disable(struct omap_sr *sr)
{
if (!sr) {
- pr_warn("%s: NULL omap_sr from %pF\n", __func__,
- (void *)_RET_IP_);
+ pr_warn("%s: NULL omap_sr from %pF\n",
+ __func__, (void *)_RET_IP_);
return;
}
@@ -658,13 +657,13 @@
struct omap_sr *sr_info;
if (!class_data) {
- pr_warning("%s:, Smartreflex class data passed is NULL\n",
+ pr_warn("%s:, Smartreflex class data passed is NULL\n",
__func__);
return -EINVAL;
}
if (sr_class) {
- pr_warning("%s: Smartreflex class driver already registered\n",
+ pr_warn("%s: Smartreflex class driver already registered\n",
__func__);
return -EBUSY;
}
@@ -696,7 +695,7 @@
struct omap_sr *sr = _sr_lookup(voltdm);
if (IS_ERR(sr)) {
- pr_warning("%s: omap_sr struct for voltdm not found\n", __func__);
+ pr_warn("%s: omap_sr struct for voltdm not found\n", __func__);
return;
}
@@ -704,8 +703,8 @@
return;
if (!sr_class || !(sr_class->enable) || !(sr_class->configure)) {
- dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not"
- "registered\n", __func__);
+ dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not registered\n",
+ __func__);
return;
}
@@ -728,7 +727,7 @@
struct omap_sr *sr = _sr_lookup(voltdm);
if (IS_ERR(sr)) {
- pr_warning("%s: omap_sr struct for voltdm not found\n", __func__);
+ pr_warn("%s: omap_sr struct for voltdm not found\n", __func__);
return;
}
@@ -736,8 +735,8 @@
return;
if (!sr_class || !(sr_class->disable)) {
- dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not"
- "registered\n", __func__);
+ dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not registered\n",
+ __func__);
return;
}
@@ -760,7 +759,7 @@
struct omap_sr *sr = _sr_lookup(voltdm);
if (IS_ERR(sr)) {
- pr_warning("%s: omap_sr struct for voltdm not found\n", __func__);
+ pr_warn("%s: omap_sr struct for voltdm not found\n", __func__);
return;
}
@@ -768,8 +767,8 @@
return;
if (!sr_class || !(sr_class->disable)) {
- dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not"
- "registered\n", __func__);
+ dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not registered\n",
+ __func__);
return;
}
@@ -787,8 +786,8 @@
void omap_sr_register_pmic(struct omap_sr_pmic_data *pmic_data)
{
if (!pmic_data) {
- pr_warning("%s: Trying to register NULL PMIC data structure"
- "with smartreflex\n", __func__);
+ pr_warn("%s: Trying to register NULL PMIC data structure with smartreflex\n",
+ __func__);
return;
}
@@ -801,7 +800,7 @@
struct omap_sr *sr_info = data;
if (!sr_info) {
- pr_warning("%s: omap_sr struct not found\n", __func__);
+ pr_warn("%s: omap_sr struct not found\n", __func__);
return -EINVAL;
}
@@ -815,13 +814,13 @@
struct omap_sr *sr_info = data;
if (!sr_info) {
- pr_warning("%s: omap_sr struct not found\n", __func__);
+ pr_warn("%s: omap_sr struct not found\n", __func__);
return -EINVAL;
}
/* Sanity check */
if (val > 1) {
- pr_warning("%s: Invalid argument %lld\n", __func__, val);
+ pr_warn("%s: Invalid argument %lld\n", __func__, val);
return -EINVAL;
}
@@ -848,19 +847,13 @@
int i, ret = 0;
sr_info = devm_kzalloc(&pdev->dev, sizeof(struct omap_sr), GFP_KERNEL);
- if (!sr_info) {
- dev_err(&pdev->dev, "%s: unable to allocate sr_info\n",
- __func__);
+ if (!sr_info)
return -ENOMEM;
- }
sr_info->name = devm_kzalloc(&pdev->dev,
SMARTREFLEX_NAME_LEN, GFP_KERNEL);
- if (!sr_info->name) {
- dev_err(&pdev->dev, "%s: unable to allocate SR instance name\n",
- __func__);
+ if (!sr_info->name)
return -ENOMEM;
- }
platform_set_drvdata(pdev, sr_info);
@@ -912,7 +905,7 @@
if (sr_class) {
ret = sr_late_init(sr_info);
if (ret) {
- pr_warning("%s: Error in SR late init\n", __func__);
+ pr_warn("%s: Error in SR late init\n", __func__);
goto err_list_del;
}
}
@@ -923,7 +916,7 @@
if (IS_ERR_OR_NULL(sr_dbg_dir)) {
ret = PTR_ERR(sr_dbg_dir);
pr_err("%s:sr debugfs dir creation failed(%d)\n",
- __func__, ret);
+ __func__, ret);
goto err_list_del;
}
}
@@ -945,8 +938,8 @@
nvalue_dir = debugfs_create_dir("nvalue", sr_info->dbg_dir);
if (IS_ERR_OR_NULL(nvalue_dir)) {
- dev_err(&pdev->dev, "%s: Unable to create debugfs directory"
- "for n-values\n", __func__);
+ dev_err(&pdev->dev, "%s: Unable to create debugfs directory for n-values\n",
+ __func__);
ret = PTR_ERR(nvalue_dir);
goto err_debugfs;
}
@@ -1053,12 +1046,12 @@
if (sr_pmic_data && sr_pmic_data->sr_pmic_init)
sr_pmic_data->sr_pmic_init();
else
- pr_warning("%s: No PMIC hook to init smartreflex\n", __func__);
+ pr_warn("%s: No PMIC hook to init smartreflex\n", __func__);
ret = platform_driver_probe(&smartreflex_driver, omap_sr_probe);
if (ret) {
pr_err("%s: platform driver register failed for SR\n",
- __func__);
+ __func__);
return ret;
}
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index fbab29d..243b233 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -1154,8 +1154,8 @@
RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT1, rapl_defaults_byt),
RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT, rapl_defaults_cht),
- RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD1, rapl_defaults_tng),
- RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD2, rapl_defaults_ann),
+ RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD, rapl_defaults_tng),
+ RAPL_CPU(INTEL_FAM6_ATOM_MOOREFIELD, rapl_defaults_ann),
RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT, rapl_defaults_core),
RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON, rapl_defaults_core),
diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c
index 3fa17ac..cebc296 100644
--- a/drivers/rapidio/rio_cm.c
+++ b/drivers/rapidio/rio_cm.c
@@ -2247,17 +2247,30 @@
{
struct rio_channel *ch;
unsigned int i;
+ LIST_HEAD(list);
riocm_debug(EXIT, ".");
+ /*
+ * If there are any channels left in connected state send
+ * close notification to the connection partner.
+ * First build a list of channels that require a closing
+ * notification because function riocm_send_close() should
+ * be called outside of spinlock protected code.
+ */
spin_lock_bh(&idr_lock);
idr_for_each_entry(&ch_idr, ch, i) {
- riocm_debug(EXIT, "close ch %d", ch->id);
- if (ch->state == RIO_CM_CONNECTED)
- riocm_send_close(ch);
+ if (ch->state == RIO_CM_CONNECTED) {
+ riocm_debug(EXIT, "close ch %d", ch->id);
+ idr_remove(&ch_idr, ch->id);
+ list_add(&ch->ch_node, &list);
+ }
}
spin_unlock_bh(&idr_lock);
+ list_for_each_entry(ch, &list, ch_node)
+ riocm_send_close(ch);
+
return NOTIFY_DONE;
}
diff --git a/drivers/s390/char/sclp_ctl.c b/drivers/s390/char/sclp_ctl.c
index ea607a4..554eaa1 100644
--- a/drivers/s390/char/sclp_ctl.c
+++ b/drivers/s390/char/sclp_ctl.c
@@ -126,21 +126,4 @@
.name = "sclp",
.fops = &sclp_ctl_fops,
};
-
-/*
- * Register sclp_ctl misc device
- */
-static int __init sclp_ctl_init(void)
-{
- return misc_register(&sclp_ctl_device);
-}
-module_init(sclp_ctl_init);
-
-/*
- * Deregister sclp_ctl misc device
- */
-static void __exit sclp_ctl_exit(void)
-{
- misc_deregister(&sclp_ctl_device);
-}
-module_exit(sclp_ctl_exit);
+module_misc_device(sclp_ctl_device);
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index bf40063..6d4b68c4 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -999,6 +999,7 @@
__u16, __u16,
enum qeth_prot_versions);
int qeth_set_features(struct net_device *, netdev_features_t);
+int qeth_recover_features(struct net_device *);
netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
/* exports for OSN */
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 7dba6c8..20cf296 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3619,7 +3619,8 @@
int e;
e = 0;
- while (buffer->element[e].addr) {
+ while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) &&
+ buffer->element[e].addr) {
unsigned long phys_aob_addr;
phys_aob_addr = (unsigned long) buffer->element[e].addr;
@@ -6131,6 +6132,35 @@
return rc;
}
+/* try to restore device features on a device after recovery */
+int qeth_recover_features(struct net_device *dev)
+{
+ struct qeth_card *card = dev->ml_priv;
+ netdev_features_t recover = dev->features;
+
+ if (recover & NETIF_F_IP_CSUM) {
+ if (qeth_set_ipa_csum(card, 1, IPA_OUTBOUND_CHECKSUM))
+ recover ^= NETIF_F_IP_CSUM;
+ }
+ if (recover & NETIF_F_RXCSUM) {
+ if (qeth_set_ipa_csum(card, 1, IPA_INBOUND_CHECKSUM))
+ recover ^= NETIF_F_RXCSUM;
+ }
+ if (recover & NETIF_F_TSO) {
+ if (qeth_set_ipa_tso(card, 1))
+ recover ^= NETIF_F_TSO;
+ }
+
+ if (recover == dev->features)
+ return 0;
+
+ dev_warn(&card->gdev->dev,
+ "Device recovery failed to restore all offload features\n");
+ dev->features = recover;
+ return -EIO;
+}
+EXPORT_SYMBOL_GPL(qeth_recover_features);
+
int qeth_set_features(struct net_device *dev, netdev_features_t features)
{
struct qeth_card *card = dev->ml_priv;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 7bc20c5..bb27058 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -1124,14 +1124,11 @@
card->dev->hw_features |= NETIF_F_RXCSUM;
card->dev->vlan_features |= NETIF_F_RXCSUM;
}
- /* Turn on SG per default */
- card->dev->features |= NETIF_F_SG;
}
card->info.broadcast_capable = 1;
qeth_l2_request_initial_mac(card);
card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
PAGE_SIZE;
- card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1);
SET_NETDEV_DEV(card->dev, &card->gdev->dev);
netif_napi_add(card->dev, &card->napi, qeth_l2_poll, QETH_NAPI_WEIGHT);
netif_carrier_off(card->dev);
@@ -1246,6 +1243,9 @@
}
/* this also sets saved unicast addresses */
qeth_l2_set_rx_mode(card->dev);
+ rtnl_lock();
+ qeth_recover_features(card->dev);
+ rtnl_unlock();
}
/* let user_space know that device is online */
kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 7293466..272d9e7 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -257,6 +257,11 @@
if (addr->in_progress)
return -EINPROGRESS;
+ if (!qeth_card_hw_is_reachable(card)) {
+ addr->disp_flag = QETH_DISP_ADDR_DELETE;
+ return 0;
+ }
+
rc = qeth_l3_deregister_addr_entry(card, addr);
hash_del(&addr->hnode);
@@ -296,6 +301,11 @@
hash_add(card->ip_htable, &addr->hnode,
qeth_l3_ipaddr_hash(addr));
+ if (!qeth_card_hw_is_reachable(card)) {
+ addr->disp_flag = QETH_DISP_ADDR_ADD;
+ return 0;
+ }
+
/* qeth_l3_register_addr_entry can go to sleep
* if we add a IPV4 addr. It is caused by the reason
* that SETIP ipa cmd starts ARP staff for IPV4 addr.
@@ -390,12 +400,16 @@
int i;
int rc;
- QETH_CARD_TEXT(card, 4, "recoverip");
+ QETH_CARD_TEXT(card, 4, "recovrip");
spin_lock_bh(&card->ip_lock);
hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) {
- if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
+ if (addr->disp_flag == QETH_DISP_ADDR_DELETE) {
+ qeth_l3_deregister_addr_entry(card, addr);
+ hash_del(&addr->hnode);
+ kfree(addr);
+ } else if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
if (addr->proto == QETH_PROT_IPV4) {
addr->in_progress = 1;
spin_unlock_bh(&card->ip_lock);
@@ -407,10 +421,8 @@
if (!rc) {
addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
- if (addr->ref_counter < 1) {
+ if (addr->ref_counter < 1)
qeth_l3_delete_ip(card, addr);
- kfree(addr);
- }
} else {
hash_del(&addr->hnode);
kfree(addr);
@@ -689,7 +701,7 @@
spin_lock_bh(&card->ip_lock);
- if (!qeth_l3_ip_from_hash(card, ipaddr))
+ if (qeth_l3_ip_from_hash(card, ipaddr))
rc = -EEXIST;
else
qeth_l3_add_ip(card, ipaddr);
@@ -757,7 +769,7 @@
spin_lock_bh(&card->ip_lock);
- if (!qeth_l3_ip_from_hash(card, ipaddr))
+ if (qeth_l3_ip_from_hash(card, ipaddr))
rc = -EEXIST;
else
qeth_l3_add_ip(card, ipaddr);
@@ -3108,7 +3120,6 @@
card->dev->vlan_features = NETIF_F_SG |
NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
NETIF_F_TSO;
- card->dev->features = NETIF_F_SG;
}
}
} else if (card->info.type == QETH_CARD_TYPE_IQD) {
@@ -3136,7 +3147,6 @@
netif_keep_dst(card->dev);
card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
PAGE_SIZE;
- card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1);
SET_NETDEV_DEV(card->dev, &card->gdev->dev);
netif_napi_add(card->dev, &card->napi, qeth_l3_poll, QETH_NAPI_WEIGHT);
@@ -3269,6 +3279,7 @@
else
dev_open(card->dev);
qeth_l3_set_multicast_list(card->dev);
+ qeth_recover_features(card->dev);
rtnl_unlock();
}
qeth_trace_features(card);
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index 65645b1..0e00a5c 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -297,7 +297,9 @@
addr->u.a6.pfxlen = 0;
addr->type = QETH_IP_TYPE_NORMAL;
+ spin_lock_bh(&card->ip_lock);
qeth_l3_delete_ip(card, addr);
+ spin_unlock_bh(&card->ip_lock);
kfree(addr);
}
@@ -329,7 +331,10 @@
addr->type = QETH_IP_TYPE_NORMAL;
} else
return -ENOMEM;
+
+ spin_lock_bh(&card->ip_lock);
qeth_l3_add_ip(card, addr);
+ spin_unlock_bh(&card->ip_lock);
kfree(addr);
return count;
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index ba9af4a..ec6381e 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -486,6 +486,8 @@
else
shost->dma_boundary = 0xffffffff;
+ shost->use_blk_mq = scsi_use_blk_mq;
+
device_initialize(&shost->shost_gendev);
dev_set_name(&shost->shost_gendev, "host%d", shost->host_no);
shost->shost_gendev.bus = &scsi_bus_type;
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 1f36aca..1deb6ad 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -1160,7 +1160,6 @@
bool scsi_use_blk_mq = false;
#endif
module_param_named(use_blk_mq, scsi_use_blk_mq, bool, S_IWUSR | S_IRUGO);
-EXPORT_SYMBOL_GPL(scsi_use_blk_mq);
static int __init init_scsi(void)
{
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 57a4b99..85c8a51 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -29,6 +29,7 @@
extern void scsi_exit_hosts(void);
/* scsi.c */
+extern bool scsi_use_blk_mq;
extern int scsi_setup_command_freelist(struct Scsi_Host *shost);
extern void scsi_destroy_command_freelist(struct Scsi_Host *shost);
#ifdef CONFIG_SCSI_LOGGING
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 7dbbb29..deefab3 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -107,8 +107,8 @@
/* If the affinity hint is set for virtqueues */
bool affinity_hint_set;
- /* CPU hotplug notifier */
- struct notifier_block nb;
+ struct hlist_node node;
+ struct hlist_node node_dead;
/* Protected by event_vq lock */
bool stop_events;
@@ -118,6 +118,7 @@
struct virtio_scsi_vq req_vqs[];
};
+static enum cpuhp_state virtioscsi_online;
static struct kmem_cache *virtscsi_cmd_cache;
static mempool_t *virtscsi_cmd_pool;
@@ -852,21 +853,33 @@
put_online_cpus();
}
-static int virtscsi_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int virtscsi_cpu_online(unsigned int cpu, struct hlist_node *node)
{
- struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb);
- switch(action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- __virtscsi_set_affinity(vscsi, true);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
+ struct virtio_scsi *vscsi = hlist_entry_safe(node, struct virtio_scsi,
+ node);
+ __virtscsi_set_affinity(vscsi, true);
+ return 0;
+}
+
+static int virtscsi_cpu_notif_add(struct virtio_scsi *vi)
+{
+ int ret;
+
+ ret = cpuhp_state_add_instance(virtioscsi_online, &vi->node);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_state_add_instance(CPUHP_VIRT_SCSI_DEAD, &vi->node_dead);
+ if (ret)
+ cpuhp_state_remove_instance(virtioscsi_online, &vi->node);
+ return ret;
+}
+
+static void virtscsi_cpu_notif_remove(struct virtio_scsi *vi)
+{
+ cpuhp_state_remove_instance_nocalls(virtioscsi_online, &vi->node);
+ cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_SCSI_DEAD,
+ &vi->node_dead);
}
static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
@@ -929,8 +942,6 @@
virtscsi_init_vq(&vscsi->req_vqs[i - VIRTIO_SCSI_VQ_BASE],
vqs[i]);
- virtscsi_set_affinity(vscsi, true);
-
virtscsi_config_set(vdev, cdb_size, VIRTIO_SCSI_CDB_SIZE);
virtscsi_config_set(vdev, sense_size, VIRTIO_SCSI_SENSE_SIZE);
@@ -987,12 +998,9 @@
if (err)
goto virtscsi_init_failed;
- vscsi->nb.notifier_call = &virtscsi_cpu_callback;
- err = register_hotcpu_notifier(&vscsi->nb);
- if (err) {
- pr_err("registering cpu notifier failed\n");
+ err = virtscsi_cpu_notif_add(vscsi);
+ if (err)
goto scsi_add_host_failed;
- }
cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1;
shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue);
@@ -1049,7 +1057,7 @@
scsi_remove_host(shost);
- unregister_hotcpu_notifier(&vscsi->nb);
+ virtscsi_cpu_notif_remove(vscsi);
virtscsi_remove_vqs(vdev);
scsi_host_put(shost);
@@ -1061,7 +1069,7 @@
struct Scsi_Host *sh = virtio_scsi_host(vdev);
struct virtio_scsi *vscsi = shost_priv(sh);
- unregister_hotcpu_notifier(&vscsi->nb);
+ virtscsi_cpu_notif_remove(vscsi);
virtscsi_remove_vqs(vdev);
return 0;
}
@@ -1076,12 +1084,11 @@
if (err)
return err;
- err = register_hotcpu_notifier(&vscsi->nb);
+ err = virtscsi_cpu_notif_add(vscsi);
if (err) {
vdev->config->del_vqs(vdev);
return err;
}
-
virtio_device_ready(vdev);
if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
@@ -1136,6 +1143,16 @@
pr_err("mempool_create() for virtscsi_cmd_pool failed\n");
goto error;
}
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "scsi/virtio:online",
+ virtscsi_cpu_online, NULL);
+ if (ret < 0)
+ goto error;
+ virtioscsi_online = ret;
+ ret = cpuhp_setup_state_multi(CPUHP_VIRT_SCSI_DEAD, "scsi/virtio:dead",
+ NULL, virtscsi_cpu_online);
+ if (ret)
+ goto error;
ret = register_virtio_driver(&virtio_scsi_driver);
if (ret < 0)
goto error;
@@ -1151,12 +1168,17 @@
kmem_cache_destroy(virtscsi_cmd_cache);
virtscsi_cmd_cache = NULL;
}
+ if (virtioscsi_online)
+ cpuhp_remove_multi_state(virtioscsi_online);
+ cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD);
return ret;
}
static void __exit fini(void)
{
unregister_virtio_driver(&virtio_scsi_driver);
+ cpuhp_remove_multi_state(virtioscsi_online);
+ cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD);
mempool_destroy(virtscsi_cmd_pool);
kmem_cache_destroy(virtscsi_cmd_cache);
}
diff --git a/drivers/soc/samsung/pm_domains.c b/drivers/soc/samsung/pm_domains.c
index 4822346..7112004 100644
--- a/drivers/soc/samsung/pm_domains.c
+++ b/drivers/soc/samsung/pm_domains.c
@@ -215,29 +215,22 @@
/* Assign the child power domains to their parents */
for_each_matching_node(np, exynos_pm_domain_of_match) {
- struct generic_pm_domain *child_domain, *parent_domain;
- struct of_phandle_args args;
+ struct of_phandle_args child, parent;
- args.np = np;
- args.args_count = 0;
- child_domain = of_genpd_get_from_provider(&args);
- if (IS_ERR(child_domain))
- continue;
+ child.np = np;
+ child.args_count = 0;
if (of_parse_phandle_with_args(np, "power-domains",
- "#power-domain-cells", 0, &args) != 0)
+ "#power-domain-cells", 0,
+ &parent) != 0)
continue;
- parent_domain = of_genpd_get_from_provider(&args);
- if (IS_ERR(parent_domain))
- continue;
-
- if (pm_genpd_add_subdomain(parent_domain, child_domain))
+ if (of_genpd_add_subdomain(&parent, &child))
pr_warn("%s failed to add subdomain: %s\n",
- parent_domain->name, child_domain->name);
+ parent.np->name, child.np->name);
else
pr_info("%s has as child subdomain: %s.\n",
- parent_domain->name, child_domain->name);
+ parent.np->name, child.np->name);
}
return 0;
diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c
index aca282d..5ec3a59 100644
--- a/drivers/spmi/spmi-pmic-arb.c
+++ b/drivers/spmi/spmi-pmic-arb.c
@@ -954,6 +954,7 @@
if (channel > 5) {
dev_err(&pdev->dev, "invalid channel (%u) specified.\n",
channel);
+ err = -EINVAL;
goto err_put_ctrl;
}
diff --git a/drivers/staging/board/board.c b/drivers/staging/board/board.c
index 45807d8..86dc411 100644
--- a/drivers/staging/board/board.c
+++ b/drivers/staging/board/board.c
@@ -140,7 +140,6 @@
const char *domain)
{
struct of_phandle_args pd_args;
- struct generic_pm_domain *pd;
struct device_node *np;
np = of_find_node_by_path(domain);
@@ -151,14 +150,8 @@
pd_args.np = np;
pd_args.args_count = 0;
- pd = of_genpd_get_from_provider(&pd_args);
- if (IS_ERR(pd)) {
- pr_err("Cannot find genpd %s (%ld)\n", domain, PTR_ERR(pd));
- return PTR_ERR(pd);
- }
- pr_debug("Found genpd %s for device %s\n", pd->name, pdev->name);
- return pm_genpd_add_device(pd, &pdev->dev);
+ return of_genpd_add_device(&pd_args, &pdev->dev);
}
#else
static inline int board_staging_add_dev_domain(struct platform_device *pdev,
diff --git a/drivers/staging/fsl-mc/bus/mc-msi.c b/drivers/staging/fsl-mc/bus/mc-msi.c
index c7be156..4fd8e41 100644
--- a/drivers/staging/fsl-mc/bus/mc-msi.c
+++ b/drivers/staging/fsl-mc/bus/mc-msi.c
@@ -213,7 +213,7 @@
struct msi_desc *msi_desc;
for (i = 0; i < irq_count; i++) {
- msi_desc = alloc_msi_entry(dev);
+ msi_desc = alloc_msi_entry(dev, 1, NULL);
if (!msi_desc) {
dev_err(dev, "Failed to allocate msi entry\n");
error = -ENOMEM;
@@ -221,7 +221,6 @@
}
msi_desc->fsl_mc.msi_index = i;
- msi_desc->nvec_used = 1;
INIT_LIST_HEAD(&msi_desc->list);
list_add_tail(&msi_desc->list, dev_to_msi_list(dev));
}
diff --git a/drivers/staging/media/cec/TODO b/drivers/staging/media/cec/TODO
index a10d4f8..1322469 100644
--- a/drivers/staging/media/cec/TODO
+++ b/drivers/staging/media/cec/TODO
@@ -12,6 +12,7 @@
Other TODOs:
+- There are two possible replies to CEC_MSG_INITIATE_ARC. How to handle that?
- Add a flag to inhibit passing CEC RC messages to the rc subsystem.
Applications should be able to choose this when calling S_LOG_ADDRS.
- If the reply field of cec_msg is set then when the reply arrives it
diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c
index b2393bba..946986f 100644
--- a/drivers/staging/media/cec/cec-adap.c
+++ b/drivers/staging/media/cec/cec-adap.c
@@ -124,10 +124,10 @@
u64 ts = ktime_get_ns();
struct cec_fh *fh;
- mutex_lock(&adap->devnode.fhs_lock);
+ mutex_lock(&adap->devnode.lock);
list_for_each_entry(fh, &adap->devnode.fhs, list)
cec_queue_event_fh(fh, ev, ts);
- mutex_unlock(&adap->devnode.fhs_lock);
+ mutex_unlock(&adap->devnode.lock);
}
/*
@@ -191,12 +191,12 @@
u32 monitor_mode = valid_la ? CEC_MODE_MONITOR :
CEC_MODE_MONITOR_ALL;
- mutex_lock(&adap->devnode.fhs_lock);
+ mutex_lock(&adap->devnode.lock);
list_for_each_entry(fh, &adap->devnode.fhs, list) {
if (fh->mode_follower >= monitor_mode)
cec_queue_msg_fh(fh, msg);
}
- mutex_unlock(&adap->devnode.fhs_lock);
+ mutex_unlock(&adap->devnode.lock);
}
/*
@@ -207,12 +207,12 @@
{
struct cec_fh *fh;
- mutex_lock(&adap->devnode.fhs_lock);
+ mutex_lock(&adap->devnode.lock);
list_for_each_entry(fh, &adap->devnode.fhs, list) {
if (fh->mode_follower == CEC_MODE_FOLLOWER)
cec_queue_msg_fh(fh, msg);
}
- mutex_unlock(&adap->devnode.fhs_lock);
+ mutex_unlock(&adap->devnode.lock);
}
/* Notify userspace of an adapter state change. */
@@ -851,6 +851,9 @@
if (!valid_la || msg->len <= 1)
return;
+ if (adap->log_addrs.log_addr_mask == 0)
+ return;
+
/*
* Process the message on the protocol level. If is_reply is true,
* then cec_receive_notify() won't pass on the reply to the listener(s)
@@ -1047,11 +1050,17 @@
dprintk(1, "could not claim LA %d\n", i);
}
+ if (adap->log_addrs.log_addr_mask == 0 &&
+ !(las->flags & CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK))
+ goto unconfigure;
+
configured:
if (adap->log_addrs.log_addr_mask == 0) {
/* Fall back to unregistered */
las->log_addr[0] = CEC_LOG_ADDR_UNREGISTERED;
las->log_addr_mask = 1 << las->log_addr[0];
+ for (i = 1; i < las->num_log_addrs; i++)
+ las->log_addr[i] = CEC_LOG_ADDR_INVALID;
}
adap->is_configured = true;
adap->is_configuring = false;
@@ -1070,6 +1079,8 @@
cec_report_features(adap, i);
cec_report_phys_addr(adap, i);
}
+ for (i = las->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++)
+ las->log_addr[i] = CEC_LOG_ADDR_INVALID;
mutex_lock(&adap->lock);
adap->kthread_config = NULL;
mutex_unlock(&adap->lock);
@@ -1398,7 +1409,6 @@
u8 init_laddr = cec_msg_initiator(msg);
u8 devtype = cec_log_addr2dev(adap, dest_laddr);
int la_idx = cec_log_addr2idx(adap, dest_laddr);
- bool is_directed = la_idx >= 0;
bool from_unregistered = init_laddr == 0xf;
struct cec_msg tx_cec_msg = { };
@@ -1560,7 +1570,7 @@
* Unprocessed messages are aborted if userspace isn't doing
* any processing either.
*/
- if (is_directed && !is_reply && !adap->follower_cnt &&
+ if (!is_broadcast && !is_reply && !adap->follower_cnt &&
!adap->cec_follower && msg->msg[1] != CEC_MSG_FEATURE_ABORT)
return cec_feature_abort(adap, msg);
break;
diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c
index 7be7615..e274e2f 100644
--- a/drivers/staging/media/cec/cec-api.c
+++ b/drivers/staging/media/cec/cec-api.c
@@ -162,7 +162,7 @@
return -ENOTTY;
if (copy_from_user(&log_addrs, parg, sizeof(log_addrs)))
return -EFAULT;
- log_addrs.flags = 0;
+ log_addrs.flags &= CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK;
mutex_lock(&adap->lock);
if (!adap->is_configuring &&
(!log_addrs.num_log_addrs || !adap->is_configured) &&
@@ -435,7 +435,7 @@
void __user *parg = (void __user *)arg;
if (!devnode->registered)
- return -EIO;
+ return -ENODEV;
switch (cmd) {
case CEC_ADAP_G_CAPS:
@@ -508,14 +508,14 @@
filp->private_data = fh;
- mutex_lock(&devnode->fhs_lock);
+ mutex_lock(&devnode->lock);
/* Queue up initial state events */
ev_state.state_change.phys_addr = adap->phys_addr;
ev_state.state_change.log_addr_mask = adap->log_addrs.log_addr_mask;
cec_queue_event_fh(fh, &ev_state, 0);
list_add(&fh->list, &devnode->fhs);
- mutex_unlock(&devnode->fhs_lock);
+ mutex_unlock(&devnode->lock);
return 0;
}
@@ -540,9 +540,9 @@
cec_monitor_all_cnt_dec(adap);
mutex_unlock(&adap->lock);
- mutex_lock(&devnode->fhs_lock);
+ mutex_lock(&devnode->lock);
list_del(&fh->list);
- mutex_unlock(&devnode->fhs_lock);
+ mutex_unlock(&devnode->lock);
/* Unhook pending transmits from this filehandle. */
mutex_lock(&adap->lock);
diff --git a/drivers/staging/media/cec/cec-core.c b/drivers/staging/media/cec/cec-core.c
index 112a5fa..3b1e4d2 100644
--- a/drivers/staging/media/cec/cec-core.c
+++ b/drivers/staging/media/cec/cec-core.c
@@ -51,31 +51,29 @@
{
/*
* Check if the cec device is available. This needs to be done with
- * the cec_devnode_lock held to prevent an open/unregister race:
+ * the devnode->lock held to prevent an open/unregister race:
* without the lock, the device could be unregistered and freed between
* the devnode->registered check and get_device() calls, leading to
* a crash.
*/
- mutex_lock(&cec_devnode_lock);
+ mutex_lock(&devnode->lock);
/*
* return ENXIO if the cec device has been removed
* already or if it is not registered anymore.
*/
if (!devnode->registered) {
- mutex_unlock(&cec_devnode_lock);
+ mutex_unlock(&devnode->lock);
return -ENXIO;
}
/* and increase the device refcount */
get_device(&devnode->dev);
- mutex_unlock(&cec_devnode_lock);
+ mutex_unlock(&devnode->lock);
return 0;
}
void cec_put_device(struct cec_devnode *devnode)
{
- mutex_lock(&cec_devnode_lock);
put_device(&devnode->dev);
- mutex_unlock(&cec_devnode_lock);
}
/* Called when the last user of the cec device exits. */
@@ -84,11 +82,10 @@
struct cec_devnode *devnode = to_cec_devnode(cd);
mutex_lock(&cec_devnode_lock);
-
/* Mark device node number as free */
clear_bit(devnode->minor, cec_devnode_nums);
-
mutex_unlock(&cec_devnode_lock);
+
cec_delete_adapter(to_cec_adapter(devnode));
}
@@ -117,7 +114,7 @@
/* Initialization */
INIT_LIST_HEAD(&devnode->fhs);
- mutex_init(&devnode->fhs_lock);
+ mutex_init(&devnode->lock);
/* Part 1: Find a free minor number */
mutex_lock(&cec_devnode_lock);
@@ -160,7 +157,9 @@
cdev_del:
cdev_del(&devnode->cdev);
clr_bit:
+ mutex_lock(&cec_devnode_lock);
clear_bit(devnode->minor, cec_devnode_nums);
+ mutex_unlock(&cec_devnode_lock);
return ret;
}
@@ -177,17 +176,21 @@
{
struct cec_fh *fh;
- /* Check if devnode was never registered or already unregistered */
- if (!devnode->registered || devnode->unregistered)
- return;
+ mutex_lock(&devnode->lock);
- mutex_lock(&devnode->fhs_lock);
+ /* Check if devnode was never registered or already unregistered */
+ if (!devnode->registered || devnode->unregistered) {
+ mutex_unlock(&devnode->lock);
+ return;
+ }
+
list_for_each_entry(fh, &devnode->fhs, list)
wake_up_interruptible(&fh->wait);
- mutex_unlock(&devnode->fhs_lock);
devnode->registered = false;
devnode->unregistered = true;
+ mutex_unlock(&devnode->lock);
+
device_del(&devnode->dev);
cdev_del(&devnode->cdev);
put_device(&devnode->dev);
diff --git a/drivers/staging/media/pulse8-cec/pulse8-cec.c b/drivers/staging/media/pulse8-cec/pulse8-cec.c
index 94f8590..ed8bd95 100644
--- a/drivers/staging/media/pulse8-cec/pulse8-cec.c
+++ b/drivers/staging/media/pulse8-cec/pulse8-cec.c
@@ -114,14 +114,11 @@
cec_transmit_done(pulse8->adap, CEC_TX_STATUS_OK,
0, 0, 0, 0);
break;
- case MSGCODE_TRANSMIT_FAILED_LINE:
- cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ARB_LOST,
- 1, 0, 0, 0);
- break;
case MSGCODE_TRANSMIT_FAILED_ACK:
cec_transmit_done(pulse8->adap, CEC_TX_STATUS_NACK,
0, 1, 0, 0);
break;
+ case MSGCODE_TRANSMIT_FAILED_LINE:
case MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA:
case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE:
cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ERROR,
@@ -170,6 +167,9 @@
case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE:
schedule_work(&pulse8->work);
break;
+ case MSGCODE_HIGH_ERROR:
+ case MSGCODE_LOW_ERROR:
+ case MSGCODE_RECEIVE_FAILED:
case MSGCODE_TIMEOUT_ERROR:
break;
case MSGCODE_COMMAND_ACCEPTED:
@@ -388,7 +388,7 @@
int err;
cmd[0] = MSGCODE_TRANSMIT_IDLETIME;
- cmd[1] = 3;
+ cmd[1] = signal_free_time;
err = pulse8_send_and_wait(pulse8, cmd, 2,
MSGCODE_COMMAND_ACCEPTED, 1);
cmd[0] = MSGCODE_TRANSMIT_ACK_POLARITY;
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 51e0d32..a23fa5e 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -800,7 +800,7 @@
return retval;
}
-static struct file_operations ptmx_fops;
+static struct file_operations ptmx_fops __ro_after_init;
static void __init unix98_pty_init(void)
{
diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 1a16fea..a697a85 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -31,6 +31,11 @@
struct dma_chan *rxchan;
struct dma_chan *txchan;
+ /* Device address base for DMA operations */
+ phys_addr_t rx_dma_addr;
+ phys_addr_t tx_dma_addr;
+
+ /* DMA address of the buffer in memory */
dma_addr_t rx_addr;
dma_addr_t tx_addr;
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index dcf43f6..240a361 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -639,7 +639,7 @@
{
char match[] = "uart"; /* 8250-specific earlycon name */
unsigned char iotype;
- unsigned long addr;
+ resource_size_t addr;
int i;
if (strncmp(name, match, 4) != 0)
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 3590d01..fdbddbc 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -142,7 +142,7 @@
if (dma->rx_running) {
dmaengine_pause(dma->rxchan);
__dma_rx_complete(p);
- dmaengine_terminate_all(dma->rxchan);
+ dmaengine_terminate_async(dma->rxchan);
}
}
EXPORT_SYMBOL_GPL(serial8250_rx_dma_flush);
@@ -150,6 +150,10 @@
int serial8250_request_dma(struct uart_8250_port *p)
{
struct uart_8250_dma *dma = p->dma;
+ phys_addr_t rx_dma_addr = dma->rx_dma_addr ?
+ dma->rx_dma_addr : p->port.mapbase;
+ phys_addr_t tx_dma_addr = dma->tx_dma_addr ?
+ dma->tx_dma_addr : p->port.mapbase;
dma_cap_mask_t mask;
struct dma_slave_caps caps;
int ret;
@@ -157,11 +161,11 @@
/* Default slave configuration parameters */
dma->rxconf.direction = DMA_DEV_TO_MEM;
dma->rxconf.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
- dma->rxconf.src_addr = p->port.mapbase + UART_RX;
+ dma->rxconf.src_addr = rx_dma_addr + UART_RX;
dma->txconf.direction = DMA_MEM_TO_DEV;
dma->txconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
- dma->txconf.dst_addr = p->port.mapbase + UART_TX;
+ dma->txconf.dst_addr = tx_dma_addr + UART_TX;
dma_cap_zero(mask);
dma_cap_set(DMA_SLAVE, mask);
@@ -247,14 +251,14 @@
return;
/* Release RX resources */
- dmaengine_terminate_all(dma->rxchan);
+ dmaengine_terminate_sync(dma->rxchan);
dma_free_coherent(dma->rxchan->device->dev, dma->rx_size, dma->rx_buf,
dma->rx_addr);
dma_release_channel(dma->rxchan);
dma->rxchan = NULL;
/* Release TX resources */
- dmaengine_terminate_all(dma->txchan);
+ dmaengine_terminate_sync(dma->txchan);
dma_unmap_single(dma->txchan->device->dev, dma->tx_addr,
UART_XMIT_SIZE, DMA_TO_DEVICE);
dma_release_channel(dma->txchan);
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index e199696..459d726 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -298,12 +298,17 @@
p->serial_out = dw8250_serial_out32be;
}
} else if (has_acpi_companion(p->dev)) {
- p->iotype = UPIO_MEM32;
- p->regshift = 2;
- p->serial_in = dw8250_serial_in32;
+ const struct acpi_device_id *id;
+
+ id = acpi_match_device(p->dev->driver->acpi_match_table,
+ p->dev);
+ if (id && !strcmp(id->id, "APMC0D08")) {
+ p->iotype = UPIO_MEM32;
+ p->regshift = 2;
+ p->serial_in = dw8250_serial_in32;
+ data->uart_16550_compatible = true;
+ }
p->set_termios = dw8250_set_termios;
- /* So far none of there implement the Busy Functionality */
- data->uart_16550_compatible = true;
}
/* Platforms with iDMA */
@@ -360,18 +365,19 @@
struct resource *regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
int irq = platform_get_irq(pdev, 0);
struct uart_port *p = &uart.port;
+ struct device *dev = &pdev->dev;
struct dw8250_data *data;
int err;
u32 val;
if (!regs) {
- dev_err(&pdev->dev, "no registers defined\n");
+ dev_err(dev, "no registers defined\n");
return -EINVAL;
}
if (irq < 0) {
if (irq != -EPROBE_DEFER)
- dev_err(&pdev->dev, "cannot get irq\n");
+ dev_err(dev, "cannot get irq\n");
return irq;
}
@@ -382,16 +388,16 @@
p->pm = dw8250_do_pm;
p->type = PORT_8250;
p->flags = UPF_SHARE_IRQ | UPF_FIXED_PORT;
- p->dev = &pdev->dev;
+ p->dev = dev;
p->iotype = UPIO_MEM;
p->serial_in = dw8250_serial_in;
p->serial_out = dw8250_serial_out;
- p->membase = devm_ioremap(&pdev->dev, regs->start, resource_size(regs));
+ p->membase = devm_ioremap(dev, regs->start, resource_size(regs));
if (!p->membase)
return -ENOMEM;
- data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
@@ -399,57 +405,57 @@
data->usr_reg = DW_UART_USR;
p->private_data = data;
- data->uart_16550_compatible = device_property_read_bool(p->dev,
+ data->uart_16550_compatible = device_property_read_bool(dev,
"snps,uart-16550-compatible");
- err = device_property_read_u32(p->dev, "reg-shift", &val);
+ err = device_property_read_u32(dev, "reg-shift", &val);
if (!err)
p->regshift = val;
- err = device_property_read_u32(p->dev, "reg-io-width", &val);
+ err = device_property_read_u32(dev, "reg-io-width", &val);
if (!err && val == 4) {
p->iotype = UPIO_MEM32;
p->serial_in = dw8250_serial_in32;
p->serial_out = dw8250_serial_out32;
}
- if (device_property_read_bool(p->dev, "dcd-override")) {
+ if (device_property_read_bool(dev, "dcd-override")) {
/* Always report DCD as active */
data->msr_mask_on |= UART_MSR_DCD;
data->msr_mask_off |= UART_MSR_DDCD;
}
- if (device_property_read_bool(p->dev, "dsr-override")) {
+ if (device_property_read_bool(dev, "dsr-override")) {
/* Always report DSR as active */
data->msr_mask_on |= UART_MSR_DSR;
data->msr_mask_off |= UART_MSR_DDSR;
}
- if (device_property_read_bool(p->dev, "cts-override")) {
+ if (device_property_read_bool(dev, "cts-override")) {
/* Always report CTS as active */
data->msr_mask_on |= UART_MSR_CTS;
data->msr_mask_off |= UART_MSR_DCTS;
}
- if (device_property_read_bool(p->dev, "ri-override")) {
+ if (device_property_read_bool(dev, "ri-override")) {
/* Always report Ring indicator as inactive */
data->msr_mask_off |= UART_MSR_RI;
data->msr_mask_off |= UART_MSR_TERI;
}
/* Always ask for fixed clock rate from a property. */
- device_property_read_u32(p->dev, "clock-frequency", &p->uartclk);
+ device_property_read_u32(dev, "clock-frequency", &p->uartclk);
/* If there is separate baudclk, get the rate from it. */
- data->clk = devm_clk_get(&pdev->dev, "baudclk");
+ data->clk = devm_clk_get(dev, "baudclk");
if (IS_ERR(data->clk) && PTR_ERR(data->clk) != -EPROBE_DEFER)
- data->clk = devm_clk_get(&pdev->dev, NULL);
+ data->clk = devm_clk_get(dev, NULL);
if (IS_ERR(data->clk) && PTR_ERR(data->clk) == -EPROBE_DEFER)
return -EPROBE_DEFER;
if (!IS_ERR_OR_NULL(data->clk)) {
err = clk_prepare_enable(data->clk);
if (err)
- dev_warn(&pdev->dev, "could not enable optional baudclk: %d\n",
+ dev_warn(dev, "could not enable optional baudclk: %d\n",
err);
else
p->uartclk = clk_get_rate(data->clk);
@@ -457,24 +463,24 @@
/* If no clock rate is defined, fail. */
if (!p->uartclk) {
- dev_err(&pdev->dev, "clock rate not defined\n");
+ dev_err(dev, "clock rate not defined\n");
return -EINVAL;
}
- data->pclk = devm_clk_get(&pdev->dev, "apb_pclk");
- if (IS_ERR(data->clk) && PTR_ERR(data->clk) == -EPROBE_DEFER) {
+ data->pclk = devm_clk_get(dev, "apb_pclk");
+ if (IS_ERR(data->pclk) && PTR_ERR(data->pclk) == -EPROBE_DEFER) {
err = -EPROBE_DEFER;
goto err_clk;
}
if (!IS_ERR(data->pclk)) {
err = clk_prepare_enable(data->pclk);
if (err) {
- dev_err(&pdev->dev, "could not enable apb_pclk\n");
+ dev_err(dev, "could not enable apb_pclk\n");
goto err_clk;
}
}
- data->rst = devm_reset_control_get_optional(&pdev->dev, NULL);
+ data->rst = devm_reset_control_get_optional(dev, NULL);
if (IS_ERR(data->rst) && PTR_ERR(data->rst) == -EPROBE_DEFER) {
err = -EPROBE_DEFER;
goto err_pclk;
@@ -506,8 +512,8 @@
platform_set_drvdata(pdev, data);
- pm_runtime_set_active(&pdev->dev);
- pm_runtime_enable(&pdev->dev);
+ pm_runtime_set_active(dev);
+ pm_runtime_enable(dev);
return 0;
@@ -619,6 +625,7 @@
{ "APMC0D08", 0},
{ "AMD0020", 0 },
{ "AMDI0020", 0 },
+ { "HISI0031", 0 },
{ },
};
MODULE_DEVICE_TABLE(acpi, dw8250_acpi_match);
diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c
new file mode 100644
index 0000000..886fcf3
--- /dev/null
+++ b/drivers/tty/serial/8250/8250_lpss.c
@@ -0,0 +1,378 @@
+/*
+ * 8250_lpss.c - Driver for UART on Intel Braswell and various other Intel SoCs
+ *
+ * Copyright (C) 2016 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/rational.h>
+
+#include <linux/dmaengine.h>
+#include <linux/dma/dw.h>
+
+#include "8250.h"
+
+#define PCI_DEVICE_ID_INTEL_QRK_UARTx 0x0936
+
+#define PCI_DEVICE_ID_INTEL_BYT_UART1 0x0f0a
+#define PCI_DEVICE_ID_INTEL_BYT_UART2 0x0f0c
+
+#define PCI_DEVICE_ID_INTEL_BSW_UART1 0x228a
+#define PCI_DEVICE_ID_INTEL_BSW_UART2 0x228c
+
+#define PCI_DEVICE_ID_INTEL_BDW_UART1 0x9ce3
+#define PCI_DEVICE_ID_INTEL_BDW_UART2 0x9ce4
+
+/* Intel LPSS specific registers */
+
+#define BYT_PRV_CLK 0x800
+#define BYT_PRV_CLK_EN BIT(0)
+#define BYT_PRV_CLK_M_VAL_SHIFT 1
+#define BYT_PRV_CLK_N_VAL_SHIFT 16
+#define BYT_PRV_CLK_UPDATE BIT(31)
+
+#define BYT_TX_OVF_INT 0x820
+#define BYT_TX_OVF_INT_MASK BIT(1)
+
+struct lpss8250;
+
+struct lpss8250_board {
+ unsigned long freq;
+ unsigned int base_baud;
+ int (*setup)(struct lpss8250 *, struct uart_port *p);
+ void (*exit)(struct lpss8250 *);
+};
+
+struct lpss8250 {
+ int line;
+ struct lpss8250_board *board;
+
+ /* DMA parameters */
+ struct uart_8250_dma dma;
+ struct dw_dma_chip dma_chip;
+ struct dw_dma_slave dma_param;
+ u8 dma_maxburst;
+};
+
+static void byt_set_termios(struct uart_port *p, struct ktermios *termios,
+ struct ktermios *old)
+{
+ unsigned int baud = tty_termios_baud_rate(termios);
+ struct lpss8250 *lpss = p->private_data;
+ unsigned long fref = lpss->board->freq, fuart = baud * 16;
+ unsigned long w = BIT(15) - 1;
+ unsigned long m, n;
+ u32 reg;
+
+ /* Gracefully handle the B0 case: fall back to B9600 */
+ fuart = fuart ? fuart : 9600 * 16;
+
+ /* Get Fuart closer to Fref */
+ fuart *= rounddown_pow_of_two(fref / fuart);
+
+ /*
+ * For baud rates 0.5M, 1M, 1.5M, 2M, 2.5M, 3M, 3.5M and 4M the
+ * dividers must be adjusted.
+ *
+ * uartclk = (m / n) * 100 MHz, where m <= n
+ */
+ rational_best_approximation(fuart, fref, w, w, &m, &n);
+ p->uartclk = fuart;
+
+ /* Reset the clock */
+ reg = (m << BYT_PRV_CLK_M_VAL_SHIFT) | (n << BYT_PRV_CLK_N_VAL_SHIFT);
+ writel(reg, p->membase + BYT_PRV_CLK);
+ reg |= BYT_PRV_CLK_EN | BYT_PRV_CLK_UPDATE;
+ writel(reg, p->membase + BYT_PRV_CLK);
+
+ p->status &= ~UPSTAT_AUTOCTS;
+ if (termios->c_cflag & CRTSCTS)
+ p->status |= UPSTAT_AUTOCTS;
+
+ serial8250_do_set_termios(p, termios, old);
+}
+
+static unsigned int byt_get_mctrl(struct uart_port *port)
+{
+ unsigned int ret = serial8250_do_get_mctrl(port);
+
+ /* Force DCD and DSR signals to permanently be reported as active */
+ ret |= TIOCM_CAR | TIOCM_DSR;
+
+ return ret;
+}
+
+static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port)
+{
+ struct dw_dma_slave *param = &lpss->dma_param;
+ struct uart_8250_port *up = up_to_u8250p(port);
+ struct pci_dev *pdev = to_pci_dev(port->dev);
+ unsigned int dma_devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+ struct pci_dev *dma_dev = pci_get_slot(pdev->bus, dma_devfn);
+
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_BYT_UART1:
+ case PCI_DEVICE_ID_INTEL_BSW_UART1:
+ case PCI_DEVICE_ID_INTEL_BDW_UART1:
+ param->src_id = 3;
+ param->dst_id = 2;
+ break;
+ case PCI_DEVICE_ID_INTEL_BYT_UART2:
+ case PCI_DEVICE_ID_INTEL_BSW_UART2:
+ case PCI_DEVICE_ID_INTEL_BDW_UART2:
+ param->src_id = 5;
+ param->dst_id = 4;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ param->dma_dev = &dma_dev->dev;
+ param->m_master = 0;
+ param->p_master = 1;
+
+ /* TODO: Detect FIFO size automaticaly for DesignWare 8250 */
+ port->fifosize = 64;
+ up->tx_loadsz = 64;
+
+ lpss->dma_maxburst = 16;
+
+ port->set_termios = byt_set_termios;
+ port->get_mctrl = byt_get_mctrl;
+
+ /* Disable TX counter interrupts */
+ writel(BYT_TX_OVF_INT_MASK, port->membase + BYT_TX_OVF_INT);
+
+ return 0;
+}
+
+#ifdef CONFIG_SERIAL_8250_DMA
+static const struct dw_dma_platform_data qrk_serial_dma_pdata = {
+ .nr_channels = 2,
+ .is_private = true,
+ .is_nollp = true,
+ .chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
+ .chan_priority = CHAN_PRIORITY_ASCENDING,
+ .block_size = 4095,
+ .nr_masters = 1,
+ .data_width = {4},
+};
+
+static void qrk_serial_setup_dma(struct lpss8250 *lpss, struct uart_port *port)
+{
+ struct uart_8250_dma *dma = &lpss->dma;
+ struct dw_dma_chip *chip = &lpss->dma_chip;
+ struct dw_dma_slave *param = &lpss->dma_param;
+ struct pci_dev *pdev = to_pci_dev(port->dev);
+ int ret;
+
+ chip->dev = &pdev->dev;
+ chip->irq = pdev->irq;
+ chip->regs = pci_ioremap_bar(pdev, 1);
+ chip->pdata = &qrk_serial_dma_pdata;
+
+ /* Falling back to PIO mode if DMA probing fails */
+ ret = dw_dma_probe(chip);
+ if (ret)
+ return;
+
+ /* Special DMA address for UART */
+ dma->rx_dma_addr = 0xfffff000;
+ dma->tx_dma_addr = 0xfffff000;
+
+ param->dma_dev = &pdev->dev;
+ param->src_id = 0;
+ param->dst_id = 1;
+ param->hs_polarity = true;
+
+ lpss->dma_maxburst = 8;
+}
+
+static void qrk_serial_exit_dma(struct lpss8250 *lpss)
+{
+ struct dw_dma_slave *param = &lpss->dma_param;
+
+ if (!param->dma_dev)
+ return;
+ dw_dma_remove(&lpss->dma_chip);
+}
+#else /* CONFIG_SERIAL_8250_DMA */
+static void qrk_serial_setup_dma(struct lpss8250 *lpss, struct uart_port *port) {}
+static void qrk_serial_exit_dma(struct lpss8250 *lpss) {}
+#endif /* !CONFIG_SERIAL_8250_DMA */
+
+static int qrk_serial_setup(struct lpss8250 *lpss, struct uart_port *port)
+{
+ struct pci_dev *pdev = to_pci_dev(port->dev);
+ int ret;
+
+ ret = pci_alloc_irq_vectors(pdev, 1, 1, 0);
+ if (ret < 0)
+ return ret;
+
+ port->irq = pci_irq_vector(pdev, 0);
+
+ qrk_serial_setup_dma(lpss, port);
+ return 0;
+}
+
+static void qrk_serial_exit(struct lpss8250 *lpss)
+{
+ qrk_serial_exit_dma(lpss);
+}
+
+static bool lpss8250_dma_filter(struct dma_chan *chan, void *param)
+{
+ struct dw_dma_slave *dws = param;
+
+ if (dws->dma_dev != chan->device->dev)
+ return false;
+
+ chan->private = dws;
+ return true;
+}
+
+static int lpss8250_dma_setup(struct lpss8250 *lpss, struct uart_8250_port *port)
+{
+ struct uart_8250_dma *dma = &lpss->dma;
+ struct dw_dma_slave *rx_param, *tx_param;
+ struct device *dev = port->port.dev;
+
+ if (!lpss->dma_param.dma_dev)
+ return 0;
+
+ rx_param = devm_kzalloc(dev, sizeof(*rx_param), GFP_KERNEL);
+ if (!rx_param)
+ return -ENOMEM;
+
+ tx_param = devm_kzalloc(dev, sizeof(*tx_param), GFP_KERNEL);
+ if (!tx_param)
+ return -ENOMEM;
+
+ *rx_param = lpss->dma_param;
+ dma->rxconf.src_maxburst = lpss->dma_maxburst;
+
+ *tx_param = lpss->dma_param;
+ dma->txconf.dst_maxburst = lpss->dma_maxburst;
+
+ dma->fn = lpss8250_dma_filter;
+ dma->rx_param = rx_param;
+ dma->tx_param = tx_param;
+
+ port->dma = dma;
+ return 0;
+}
+
+static int lpss8250_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct uart_8250_port uart;
+ struct lpss8250 *lpss;
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ pci_set_master(pdev);
+
+ lpss = devm_kzalloc(&pdev->dev, sizeof(*lpss), GFP_KERNEL);
+ if (!lpss)
+ return -ENOMEM;
+
+ lpss->board = (struct lpss8250_board *)id->driver_data;
+
+ memset(&uart, 0, sizeof(struct uart_8250_port));
+
+ uart.port.dev = &pdev->dev;
+ uart.port.irq = pdev->irq;
+ uart.port.private_data = lpss;
+ uart.port.type = PORT_16550A;
+ uart.port.iotype = UPIO_MEM;
+ uart.port.regshift = 2;
+ uart.port.uartclk = lpss->board->base_baud * 16;
+ uart.port.flags = UPF_SHARE_IRQ | UPF_FIXED_PORT | UPF_FIXED_TYPE;
+ uart.capabilities = UART_CAP_FIFO | UART_CAP_AFE;
+ uart.port.mapbase = pci_resource_start(pdev, 0);
+ uart.port.membase = pcim_iomap(pdev, 0, 0);
+ if (!uart.port.membase)
+ return -ENOMEM;
+
+ ret = lpss->board->setup(lpss, &uart.port);
+ if (ret)
+ return ret;
+
+ ret = lpss8250_dma_setup(lpss, &uart);
+ if (ret)
+ goto err_exit;
+
+ ret = serial8250_register_8250_port(&uart);
+ if (ret < 0)
+ goto err_exit;
+
+ lpss->line = ret;
+
+ pci_set_drvdata(pdev, lpss);
+ return 0;
+
+err_exit:
+ if (lpss->board->exit)
+ lpss->board->exit(lpss);
+ return ret;
+}
+
+static void lpss8250_remove(struct pci_dev *pdev)
+{
+ struct lpss8250 *lpss = pci_get_drvdata(pdev);
+
+ if (lpss->board->exit)
+ lpss->board->exit(lpss);
+
+ serial8250_unregister_port(lpss->line);
+}
+
+static const struct lpss8250_board byt_board = {
+ .freq = 100000000,
+ .base_baud = 2764800,
+ .setup = byt_serial_setup,
+};
+
+static const struct lpss8250_board qrk_board = {
+ .freq = 44236800,
+ .base_baud = 2764800,
+ .setup = qrk_serial_setup,
+ .exit = qrk_serial_exit,
+};
+
+#define LPSS_DEVICE(id, board) { PCI_VDEVICE(INTEL, id), (kernel_ulong_t)&board }
+
+static const struct pci_device_id pci_ids[] = {
+ LPSS_DEVICE(PCI_DEVICE_ID_INTEL_QRK_UARTx, qrk_board),
+ LPSS_DEVICE(PCI_DEVICE_ID_INTEL_BYT_UART1, byt_board),
+ LPSS_DEVICE(PCI_DEVICE_ID_INTEL_BYT_UART2, byt_board),
+ LPSS_DEVICE(PCI_DEVICE_ID_INTEL_BSW_UART1, byt_board),
+ LPSS_DEVICE(PCI_DEVICE_ID_INTEL_BSW_UART2, byt_board),
+ LPSS_DEVICE(PCI_DEVICE_ID_INTEL_BDW_UART1, byt_board),
+ LPSS_DEVICE(PCI_DEVICE_ID_INTEL_BDW_UART2, byt_board),
+ { },
+};
+MODULE_DEVICE_TABLE(pci, pci_ids);
+
+static struct pci_driver lpss8250_pci_driver = {
+ .name = "8250_lpss",
+ .id_table = pci_ids,
+ .probe = lpss8250_probe,
+ .remove = lpss8250_remove,
+};
+
+module_pci_driver(lpss8250_pci_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel LPSS UART driver");
diff --git a/drivers/tty/serial/8250/8250_mid.c b/drivers/tty/serial/8250/8250_mid.c
index 20c5db2..39c2324 100644
--- a/drivers/tty/serial/8250/8250_mid.c
+++ b/drivers/tty/serial/8250/8250_mid.c
@@ -99,27 +99,27 @@
struct uart_8250_port *up = up_to_u8250p(p);
unsigned int fisr = serial_port_in(p, INTEL_MID_UART_DNV_FISR);
u32 status;
- int ret = IRQ_NONE;
+ int ret = 0;
int err;
if (fisr & BIT(2)) {
err = hsu_dma_get_status(&mid->dma_chip, 1, &status);
if (err > 0) {
serial8250_rx_dma_flush(up);
- ret |= IRQ_HANDLED;
+ ret |= 1;
} else if (err == 0)
ret |= hsu_dma_do_irq(&mid->dma_chip, 1, status);
}
if (fisr & BIT(1)) {
err = hsu_dma_get_status(&mid->dma_chip, 0, &status);
if (err > 0)
- ret |= IRQ_HANDLED;
+ ret |= 1;
else if (err == 0)
ret |= hsu_dma_do_irq(&mid->dma_chip, 0, status);
}
if (fisr & BIT(0))
ret |= serial8250_handle_irq(p, serial_port_in(p, UART_IIR));
- return ret;
+ return IRQ_RETVAL(ret);
}
#define DNV_DMA_CHAN_OFFSET 0x80
diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
index 3611ec9..ce0cc47 100644
--- a/drivers/tty/serial/8250/8250_mtk.c
+++ b/drivers/tty/serial/8250/8250_mtk.c
@@ -62,7 +62,7 @@
*/
baud = uart_get_baud_rate(port, termios, old,
port->uartclk / 16 / 0xffff,
- port->uartclk / 16);
+ port->uartclk);
if (baud <= 115200) {
serial_port_out(port, UART_MTK_HIGHS, 0x0);
@@ -76,10 +76,6 @@
quot = DIV_ROUND_UP(port->uartclk, 4 * baud);
} else {
serial_port_out(port, UART_MTK_HIGHS, 0x3);
-
- /* Set to highest baudrate supported */
- if (baud >= 1152000)
- baud = 921600;
quot = DIV_ROUND_UP(port->uartclk, 256 * baud);
}
diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c
index 38963d7..7a8b5fc 100644
--- a/drivers/tty/serial/8250/8250_of.c
+++ b/drivers/tty/serial/8250/8250_of.c
@@ -195,6 +195,7 @@
switch (port_type) {
case PORT_8250 ... PORT_MAX_8250:
{
+ u32 tx_threshold;
struct uart_8250_port port8250;
memset(&port8250, 0, sizeof(port8250));
port8250.port = port;
@@ -202,6 +203,12 @@
if (port.fifosize)
port8250.capabilities = UART_CAP_FIFO;
+ /* Check for TX FIFO threshold & set tx_loadsz */
+ if ((of_property_read_u32(ofdev->dev.of_node, "tx-threshold",
+ &tx_threshold) == 0) &&
+ (tx_threshold < port.fifosize))
+ port8250.tx_loadsz = port.fifosize - tx_threshold;
+
if (of_property_read_bool(ofdev->dev.of_node,
"auto-flow-control"))
port8250.capabilities |= UART_CAP_AFE;
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index bc51b32..b98c157 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -21,14 +21,10 @@
#include <linux/serial_core.h>
#include <linux/8250_pci.h>
#include <linux/bitops.h>
-#include <linux/rational.h>
#include <asm/byteorder.h>
#include <asm/io.h>
-#include <linux/dmaengine.h>
-#include <linux/platform_data/dma-dw.h>
-
#include "8250.h"
/*
@@ -1349,160 +1345,6 @@
return ret;
}
-#define PCI_DEVICE_ID_INTEL_BYT_UART1 0x0f0a
-#define PCI_DEVICE_ID_INTEL_BYT_UART2 0x0f0c
-
-#define PCI_DEVICE_ID_INTEL_BSW_UART1 0x228a
-#define PCI_DEVICE_ID_INTEL_BSW_UART2 0x228c
-
-#define PCI_DEVICE_ID_INTEL_BDW_UART1 0x9ce3
-#define PCI_DEVICE_ID_INTEL_BDW_UART2 0x9ce4
-
-#define BYT_PRV_CLK 0x800
-#define BYT_PRV_CLK_EN (1 << 0)
-#define BYT_PRV_CLK_M_VAL_SHIFT 1
-#define BYT_PRV_CLK_N_VAL_SHIFT 16
-#define BYT_PRV_CLK_UPDATE (1 << 31)
-
-#define BYT_TX_OVF_INT 0x820
-#define BYT_TX_OVF_INT_MASK (1 << 1)
-
-static void
-byt_set_termios(struct uart_port *p, struct ktermios *termios,
- struct ktermios *old)
-{
- unsigned int baud = tty_termios_baud_rate(termios);
- unsigned long fref = 100000000, fuart = baud * 16;
- unsigned long w = BIT(15) - 1;
- unsigned long m, n;
- u32 reg;
-
- /* Gracefully handle the B0 case: fall back to B9600 */
- fuart = fuart ? fuart : 9600 * 16;
-
- /* Get Fuart closer to Fref */
- fuart *= rounddown_pow_of_two(fref / fuart);
-
- /*
- * For baud rates 0.5M, 1M, 1.5M, 2M, 2.5M, 3M, 3.5M and 4M the
- * dividers must be adjusted.
- *
- * uartclk = (m / n) * 100 MHz, where m <= n
- */
- rational_best_approximation(fuart, fref, w, w, &m, &n);
- p->uartclk = fuart;
-
- /* Reset the clock */
- reg = (m << BYT_PRV_CLK_M_VAL_SHIFT) | (n << BYT_PRV_CLK_N_VAL_SHIFT);
- writel(reg, p->membase + BYT_PRV_CLK);
- reg |= BYT_PRV_CLK_EN | BYT_PRV_CLK_UPDATE;
- writel(reg, p->membase + BYT_PRV_CLK);
-
- p->status &= ~UPSTAT_AUTOCTS;
- if (termios->c_cflag & CRTSCTS)
- p->status |= UPSTAT_AUTOCTS;
-
- serial8250_do_set_termios(p, termios, old);
-}
-
-static bool byt_dma_filter(struct dma_chan *chan, void *param)
-{
- struct dw_dma_slave *dws = param;
-
- if (dws->dma_dev != chan->device->dev)
- return false;
-
- chan->private = dws;
- return true;
-}
-
-static unsigned int
-byt_get_mctrl(struct uart_port *port)
-{
- unsigned int ret = serial8250_do_get_mctrl(port);
-
- /* Force DCD and DSR signals to permanently be reported as active. */
- ret |= TIOCM_CAR | TIOCM_DSR;
-
- return ret;
-}
-
-static int
-byt_serial_setup(struct serial_private *priv,
- const struct pciserial_board *board,
- struct uart_8250_port *port, int idx)
-{
- struct pci_dev *pdev = priv->dev;
- struct device *dev = port->port.dev;
- struct uart_8250_dma *dma;
- struct dw_dma_slave *tx_param, *rx_param;
- struct pci_dev *dma_dev;
- int ret;
-
- dma = devm_kzalloc(dev, sizeof(*dma), GFP_KERNEL);
- if (!dma)
- return -ENOMEM;
-
- tx_param = devm_kzalloc(dev, sizeof(*tx_param), GFP_KERNEL);
- if (!tx_param)
- return -ENOMEM;
-
- rx_param = devm_kzalloc(dev, sizeof(*rx_param), GFP_KERNEL);
- if (!rx_param)
- return -ENOMEM;
-
- switch (pdev->device) {
- case PCI_DEVICE_ID_INTEL_BYT_UART1:
- case PCI_DEVICE_ID_INTEL_BSW_UART1:
- case PCI_DEVICE_ID_INTEL_BDW_UART1:
- rx_param->src_id = 3;
- tx_param->dst_id = 2;
- break;
- case PCI_DEVICE_ID_INTEL_BYT_UART2:
- case PCI_DEVICE_ID_INTEL_BSW_UART2:
- case PCI_DEVICE_ID_INTEL_BDW_UART2:
- rx_param->src_id = 5;
- tx_param->dst_id = 4;
- break;
- default:
- return -EINVAL;
- }
-
- rx_param->m_master = 0;
- rx_param->p_master = 1;
-
- dma->rxconf.src_maxburst = 16;
-
- tx_param->m_master = 0;
- tx_param->p_master = 1;
-
- dma->txconf.dst_maxburst = 16;
-
- dma_dev = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
- rx_param->dma_dev = &dma_dev->dev;
- tx_param->dma_dev = &dma_dev->dev;
-
- dma->fn = byt_dma_filter;
- dma->rx_param = rx_param;
- dma->tx_param = tx_param;
-
- ret = pci_default_setup(priv, board, port, idx);
- port->port.iotype = UPIO_MEM;
- port->port.type = PORT_16550A;
- port->port.flags = (port->port.flags | UPF_FIXED_PORT | UPF_FIXED_TYPE);
- port->port.set_termios = byt_set_termios;
- port->port.get_mctrl = byt_get_mctrl;
- port->port.fifosize = 64;
- port->tx_loadsz = 64;
- port->dma = dma;
- port->capabilities = UART_CAP_FIFO | UART_CAP_AFE;
-
- /* Disable Tx counter interrupts */
- writel(BYT_TX_OVF_INT_MASK, port->port.membase + BYT_TX_OVF_INT);
-
- return ret;
-}
-
static int
pci_omegapci_setup(struct serial_private *priv,
const struct pciserial_board *board,
@@ -1741,6 +1583,19 @@
#define PCI_DEVICE_ID_EXAR_XR17V4358 0x4358
#define PCI_DEVICE_ID_EXAR_XR17V8358 0x8358
+#define UART_EXAR_MPIOINT_7_0 0x8f /* MPIOINT[7:0] */
+#define UART_EXAR_MPIOLVL_7_0 0x90 /* MPIOLVL[7:0] */
+#define UART_EXAR_MPIO3T_7_0 0x91 /* MPIO3T[7:0] */
+#define UART_EXAR_MPIOINV_7_0 0x92 /* MPIOINV[7:0] */
+#define UART_EXAR_MPIOSEL_7_0 0x93 /* MPIOSEL[7:0] */
+#define UART_EXAR_MPIOOD_7_0 0x94 /* MPIOOD[7:0] */
+#define UART_EXAR_MPIOINT_15_8 0x95 /* MPIOINT[15:8] */
+#define UART_EXAR_MPIOLVL_15_8 0x96 /* MPIOLVL[15:8] */
+#define UART_EXAR_MPIO3T_15_8 0x97 /* MPIO3T[15:8] */
+#define UART_EXAR_MPIOINV_15_8 0x98 /* MPIOINV[15:8] */
+#define UART_EXAR_MPIOSEL_15_8 0x99 /* MPIOSEL[15:8] */
+#define UART_EXAR_MPIOOD_15_8 0x9a /* MPIOOD[15:8] */
+
static int
pci_xr17c154_setup(struct serial_private *priv,
const struct pciserial_board *board,
@@ -1783,18 +1638,18 @@
* Setup Multipurpose Input/Output pins.
*/
if (idx == 0) {
- writeb(0x00, p + 0x8f); /*MPIOINT[7:0]*/
- writeb(0x00, p + 0x90); /*MPIOLVL[7:0]*/
- writeb(0x00, p + 0x91); /*MPIO3T[7:0]*/
- writeb(0x00, p + 0x92); /*MPIOINV[7:0]*/
- writeb(0x00, p + 0x93); /*MPIOSEL[7:0]*/
- writeb(0x00, p + 0x94); /*MPIOOD[7:0]*/
- writeb(0x00, p + 0x95); /*MPIOINT[15:8]*/
- writeb(0x00, p + 0x96); /*MPIOLVL[15:8]*/
- writeb(0x00, p + 0x97); /*MPIO3T[15:8]*/
- writeb(0x00, p + 0x98); /*MPIOINV[15:8]*/
- writeb(0x00, p + 0x99); /*MPIOSEL[15:8]*/
- writeb(0x00, p + 0x9a); /*MPIOOD[15:8]*/
+ writeb(0x00, p + UART_EXAR_MPIOINT_7_0);
+ writeb(0x00, p + UART_EXAR_MPIOLVL_7_0);
+ writeb(0x00, p + UART_EXAR_MPIO3T_7_0);
+ writeb(0x00, p + UART_EXAR_MPIOINV_7_0);
+ writeb(0x00, p + UART_EXAR_MPIOSEL_7_0);
+ writeb(0x00, p + UART_EXAR_MPIOOD_7_0);
+ writeb(0x00, p + UART_EXAR_MPIOINT_15_8);
+ writeb(0x00, p + UART_EXAR_MPIOLVL_15_8);
+ writeb(0x00, p + UART_EXAR_MPIO3T_15_8);
+ writeb(0x00, p + UART_EXAR_MPIOINV_15_8);
+ writeb(0x00, p + UART_EXAR_MPIOSEL_15_8);
+ writeb(0x00, p + UART_EXAR_MPIOOD_15_8);
}
writeb(0x00, p + UART_EXAR_8XMODE);
writeb(UART_FCTR_EXAR_TRGD, p + UART_EXAR_FCTR);
@@ -1830,20 +1685,20 @@
switch (priv->dev->device) {
case PCI_DEVICE_ID_COMMTECH_4222PCI335:
case PCI_DEVICE_ID_COMMTECH_4224PCI335:
- writeb(0x78, p + 0x90); /* MPIOLVL[7:0] */
- writeb(0x00, p + 0x92); /* MPIOINV[7:0] */
- writeb(0x00, p + 0x93); /* MPIOSEL[7:0] */
+ writeb(0x78, p + UART_EXAR_MPIOLVL_7_0);
+ writeb(0x00, p + UART_EXAR_MPIOINV_7_0);
+ writeb(0x00, p + UART_EXAR_MPIOSEL_7_0);
break;
case PCI_DEVICE_ID_COMMTECH_2324PCI335:
case PCI_DEVICE_ID_COMMTECH_2328PCI335:
- writeb(0x00, p + 0x90); /* MPIOLVL[7:0] */
- writeb(0xc0, p + 0x92); /* MPIOINV[7:0] */
- writeb(0xc0, p + 0x93); /* MPIOSEL[7:0] */
+ writeb(0x00, p + UART_EXAR_MPIOLVL_7_0);
+ writeb(0xc0, p + UART_EXAR_MPIOINV_7_0);
+ writeb(0xc0, p + UART_EXAR_MPIOSEL_7_0);
break;
}
- writeb(0x00, p + 0x8f); /* MPIOINT[7:0] */
- writeb(0x00, p + 0x91); /* MPIO3T[7:0] */
- writeb(0x00, p + 0x94); /* MPIOOD[7:0] */
+ writeb(0x00, p + UART_EXAR_MPIOINT_7_0);
+ writeb(0x00, p + UART_EXAR_MPIO3T_7_0);
+ writeb(0x00, p + UART_EXAR_MPIOOD_7_0);
}
writeb(0x00, p + UART_EXAR_8XMODE);
writeb(UART_FCTR_EXAR_TRGD, p + UART_EXAR_FCTR);
@@ -1934,7 +1789,6 @@
#define PCI_DEVICE_ID_COMMTECH_4222PCIE 0x0022
#define PCI_DEVICE_ID_BROADCOM_TRUMANAGE 0x160a
#define PCI_DEVICE_ID_AMCC_ADDIDATA_APCI7800 0x818e
-#define PCI_DEVICE_ID_INTEL_QRK_UART 0x0936
#define PCI_VENDOR_ID_SUNIX 0x1fd4
#define PCI_DEVICE_ID_SUNIX_1999 0x1999
@@ -2078,48 +1932,6 @@
.subdevice = PCI_ANY_ID,
.setup = kt_serial_setup,
},
- {
- .vendor = PCI_VENDOR_ID_INTEL,
- .device = PCI_DEVICE_ID_INTEL_BYT_UART1,
- .subvendor = PCI_ANY_ID,
- .subdevice = PCI_ANY_ID,
- .setup = byt_serial_setup,
- },
- {
- .vendor = PCI_VENDOR_ID_INTEL,
- .device = PCI_DEVICE_ID_INTEL_BYT_UART2,
- .subvendor = PCI_ANY_ID,
- .subdevice = PCI_ANY_ID,
- .setup = byt_serial_setup,
- },
- {
- .vendor = PCI_VENDOR_ID_INTEL,
- .device = PCI_DEVICE_ID_INTEL_BSW_UART1,
- .subvendor = PCI_ANY_ID,
- .subdevice = PCI_ANY_ID,
- .setup = byt_serial_setup,
- },
- {
- .vendor = PCI_VENDOR_ID_INTEL,
- .device = PCI_DEVICE_ID_INTEL_BSW_UART2,
- .subvendor = PCI_ANY_ID,
- .subdevice = PCI_ANY_ID,
- .setup = byt_serial_setup,
- },
- {
- .vendor = PCI_VENDOR_ID_INTEL,
- .device = PCI_DEVICE_ID_INTEL_BDW_UART1,
- .subvendor = PCI_ANY_ID,
- .subdevice = PCI_ANY_ID,
- .setup = byt_serial_setup,
- },
- {
- .vendor = PCI_VENDOR_ID_INTEL,
- .device = PCI_DEVICE_ID_INTEL_BDW_UART2,
- .subvendor = PCI_ANY_ID,
- .subdevice = PCI_ANY_ID,
- .setup = byt_serial_setup,
- },
/*
* ITE
*/
@@ -2992,8 +2804,6 @@
pbn_ADDIDATA_PCIe_4_3906250,
pbn_ADDIDATA_PCIe_8_3906250,
pbn_ce4100_1_115200,
- pbn_byt,
- pbn_qrk,
pbn_omegapci,
pbn_NETMOS9900_2s_115200,
pbn_brcm_trumanage,
@@ -3769,18 +3579,6 @@
.base_baud = 921600,
.reg_shift = 2,
},
- [pbn_byt] = {
- .flags = FL_BASE0,
- .num_ports = 1,
- .base_baud = 2764800,
- .reg_shift = 2,
- },
- [pbn_qrk] = {
- .flags = FL_BASE0,
- .num_ports = 1,
- .base_baud = 2764800,
- .reg_shift = 2,
- },
[pbn_omegapci] = {
.flags = FL_BASE0,
.num_ports = 8,
@@ -3892,6 +3690,15 @@
{ PCI_VDEVICE(INTEL, 0x081d), },
{ PCI_VDEVICE(INTEL, 0x1191), },
{ PCI_VDEVICE(INTEL, 0x19d8), },
+
+ /* Intel platforms with DesignWare UART */
+ { PCI_VDEVICE(INTEL, 0x0936), },
+ { PCI_VDEVICE(INTEL, 0x0f0a), },
+ { PCI_VDEVICE(INTEL, 0x0f0c), },
+ { PCI_VDEVICE(INTEL, 0x228a), },
+ { PCI_VDEVICE(INTEL, 0x228c), },
+ { PCI_VDEVICE(INTEL, 0x9ce3), },
+ { PCI_VDEVICE(INTEL, 0x9ce4), },
};
/*
@@ -5659,40 +5466,7 @@
{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100_UART,
PCI_ANY_ID, PCI_ANY_ID, 0, 0,
pbn_ce4100_1_115200 },
- /* Intel BayTrail */
- { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BYT_UART1,
- PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
- pbn_byt },
- { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BYT_UART2,
- PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
- pbn_byt },
- { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BSW_UART1,
- PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
- pbn_byt },
- { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BSW_UART2,
- PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
- pbn_byt },
- /* Intel Broadwell */
- { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART1,
- PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
- pbn_byt },
- { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART2,
- PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
- pbn_byt },
-
- /*
- * Intel Quark x1000
- */
- { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_QRK_UART,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0,
- pbn_qrk },
/*
* Cronyx Omega PCI
*/
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index bdfa659..1bfb6fd 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -178,7 +178,7 @@
.fifo_size = 16,
.tx_loadsz = 16,
.fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00,
- .flags = UART_CAP_FIFO | UART_CAP_AFE,
+ .flags = UART_CAP_FIFO /* | UART_CAP_AFE */,
},
[PORT_U6_16550A] = {
.name = "U6_16550A",
@@ -585,11 +585,11 @@
*/
int serial8250_em485_init(struct uart_8250_port *p)
{
- if (p->em485 != NULL)
+ if (p->em485)
return 0;
p->em485 = kmalloc(sizeof(struct uart_8250_em485), GFP_ATOMIC);
- if (p->em485 == NULL)
+ if (!p->em485)
return -ENOMEM;
setup_timer(&p->em485->stop_tx_timer,
@@ -619,7 +619,7 @@
*/
void serial8250_em485_destroy(struct uart_8250_port *p)
{
- if (p->em485 == NULL)
+ if (!p->em485)
return;
del_timer(&p->em485->start_tx_timer);
@@ -1402,10 +1402,8 @@
static void __do_stop_tx_rs485(struct uart_8250_port *p)
{
- if (!p->em485)
- return;
-
serial8250_em485_rts_after_send(p);
+
/*
* Empty the RX FIFO, we are not interested in anything
* received during the half-duplex transmission.
@@ -1414,12 +1412,8 @@
if (!(p->port.rs485.flags & SER_RS485_RX_DURING_TX)) {
serial8250_clear_fifos(p);
- serial8250_rpm_get(p);
-
p->ier |= UART_IER_RLSI | UART_IER_RDI;
serial_port_out(&p->port, UART_IER, p->ier);
-
- serial8250_rpm_put(p);
}
}
@@ -1429,6 +1423,7 @@
struct uart_8250_em485 *em485 = p->em485;
unsigned long flags;
+ serial8250_rpm_get(p);
spin_lock_irqsave(&p->port.lock, flags);
if (em485 &&
em485->active_timer == &em485->stop_tx_timer) {
@@ -1436,15 +1431,13 @@
em485->active_timer = NULL;
}
spin_unlock_irqrestore(&p->port.lock, flags);
+ serial8250_rpm_put(p);
}
static void __stop_tx_rs485(struct uart_8250_port *p)
{
struct uart_8250_em485 *em485 = p->em485;
- if (!em485)
- return;
-
/*
* __do_stop_tx_rs485 is going to set RTS according to config
* AND flush RX FIFO if required.
@@ -1475,7 +1468,7 @@
unsigned char lsr = serial_in(p, UART_LSR);
/*
* To provide required timeing and allow FIFO transfer,
- * __stop_tx_rs485 must be called only when both FIFO and
+ * __stop_tx_rs485() must be called only when both FIFO and
* shift register are empty. It is for device driver to enable
* interrupt on TEMT.
*/
@@ -1484,9 +1477,10 @@
del_timer(&em485->start_tx_timer);
em485->active_timer = NULL;
+
+ __stop_tx_rs485(p);
}
__do_stop_tx(p);
- __stop_tx_rs485(p);
}
static void serial8250_stop_tx(struct uart_port *port)
@@ -1876,6 +1870,30 @@
return ret;
}
+/*
+ * Newer 16550 compatible parts such as the SC16C650 & Altera 16550 Soft IP
+ * have a programmable TX threshold that triggers the THRE interrupt in
+ * the IIR register. In this case, the THRE interrupt indicates the FIFO
+ * has space available. Load it up with tx_loadsz bytes.
+ */
+static int serial8250_tx_threshold_handle_irq(struct uart_port *port)
+{
+ unsigned long flags;
+ unsigned int iir = serial_port_in(port, UART_IIR);
+
+ /* TX Threshold IRQ triggered so load up FIFO */
+ if ((iir & UART_IIR_ID) == UART_IIR_THRI) {
+ struct uart_8250_port *up = up_to_u8250p(port);
+
+ spin_lock_irqsave(&port->lock, flags);
+ serial8250_tx_chars(up);
+ spin_unlock_irqrestore(&port->lock, flags);
+ }
+
+ iir = serial_port_in(port, UART_IIR);
+ return serial8250_handle_irq(port, iir);
+}
+
static unsigned int serial8250_tx_empty(struct uart_port *port)
{
struct uart_8250_port *up = up_to_u8250p(port);
@@ -1988,6 +2006,7 @@
if (--tmout == 0)
break;
udelay(1);
+ touch_nmi_watchdog();
}
/* Wait up to 1s for flow control if necessary */
@@ -2164,6 +2183,25 @@
serial_port_out(port, UART_LCR, 0);
}
+ /*
+ * For the Altera 16550 variants, set TX threshold trigger level.
+ */
+ if (((port->type == PORT_ALTR_16550_F32) ||
+ (port->type == PORT_ALTR_16550_F64) ||
+ (port->type == PORT_ALTR_16550_F128)) && (port->fifosize > 1)) {
+ /* Bounds checking of TX threshold (valid 0 to fifosize-2) */
+ if ((up->tx_loadsz < 2) || (up->tx_loadsz > port->fifosize)) {
+ pr_err("ttyS%d TX FIFO Threshold errors, skipping\n",
+ serial_index(port));
+ } else {
+ serial_port_out(port, UART_ALTR_AFR,
+ UART_ALTR_EN_TXFIFO_LW);
+ serial_port_out(port, UART_ALTR_TX_LOW,
+ port->fifosize - up->tx_loadsz);
+ port->handle_irq = serial8250_tx_threshold_handle_irq;
+ }
+ }
+
if (port->irq) {
unsigned char iir1;
/*
@@ -2499,8 +2537,6 @@
struct ktermios *termios,
struct ktermios *old)
{
- unsigned int tolerance = port->uartclk / 100;
-
/*
* Ask the core to calculate the divisor for us.
* Allow 1% tolerance at the upper limit so uart clks marginally
@@ -2509,7 +2545,7 @@
*/
return uart_get_baud_rate(port, termios, old,
port->uartclk / 16 / 0xffff,
- (port->uartclk + tolerance) / 16);
+ port->uartclk);
}
void
@@ -2546,12 +2582,9 @@
/*
* MCR-based auto flow control. When AFE is enabled, RTS will be
* deasserted when the receive FIFO contains more characters than
- * the trigger, or the MCR RTS bit is cleared. In the case where
- * the remote UART is not using CTS auto flow control, we must
- * have sufficient FIFO entries for the latency of the remote
- * UART to respond. IOW, at least 32 bytes of FIFO.
+ * the trigger, or the MCR RTS bit is cleared.
*/
- if (up->capabilities & UART_CAP_AFE && port->fifosize >= 32) {
+ if (up->capabilities & UART_CAP_AFE) {
up->mcr &= ~UART_MCR_AFE;
if (termios->c_cflag & CRTSCTS)
up->mcr |= UART_MCR_AFE;
diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index 7c6f7af..8998347 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -120,7 +120,6 @@
tristate "8250/16550 PCI device support" if EXPERT
depends on SERIAL_8250 && PCI
default SERIAL_8250
- select RATIONAL
help
This builds standard PCI serial support. You may be able to
disable this feature if you only need legacy serial support.
@@ -402,6 +401,21 @@
If you have a system using an Ingenic SoC and wish to make use of
its UARTs, say Y to this option. If unsure, say N.
+config SERIAL_8250_LPSS
+ tristate "Support for serial ports on Intel LPSS platforms" if EXPERT
+ default SERIAL_8250
+ depends on SERIAL_8250 && PCI
+ depends on X86 || COMPILE_TEST
+ select DW_DMAC_CORE if SERIAL_8250_DMA
+ select DW_DMAC_PCI if (SERIAL_8250_DMA && X86_INTEL_LPSS)
+ select RATIONAL
+ help
+ Selecting this option will enable handling of the extra features
+ present on the UART found on various Intel platforms such as:
+ - Intel Baytrail SoC
+ - Intel Braswell SoC
+ - Intel Quark X1000 SoC
+
config SERIAL_8250_MID
tristate "Support for serial ports on Intel MID platforms" if EXPERT
default SERIAL_8250
diff --git a/drivers/tty/serial/8250/Makefile b/drivers/tty/serial/8250/Makefile
index 367d403..276c6fb 100644
--- a/drivers/tty/serial/8250/Makefile
+++ b/drivers/tty/serial/8250/Makefile
@@ -28,6 +28,7 @@
obj-$(CONFIG_SERIAL_8250_MT6577) += 8250_mtk.o
obj-$(CONFIG_SERIAL_8250_UNIPHIER) += 8250_uniphier.o
obj-$(CONFIG_SERIAL_8250_INGENIC) += 8250_ingenic.o
+obj-$(CONFIG_SERIAL_8250_LPSS) += 8250_lpss.o
obj-$(CONFIG_SERIAL_8250_MID) += 8250_mid.o
obj-$(CONFIG_SERIAL_8250_MOXA) += 8250_moxa.o
obj-$(CONFIG_SERIAL_OF_PLATFORM) += 8250_of.o
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 518db24a..c783140 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1380,7 +1380,7 @@
config SERIAL_PCH_UART
tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) UART"
- depends on PCI && (X86_32 || COMPILE_TEST)
+ depends on PCI && (X86_32 || MIPS || COMPILE_TEST)
select SERIAL_CORE
help
This driver is for PCH(Platform controller Hub) UART of Intel EG20T
diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c
index 32df2a0..e409d7d 100644
--- a/drivers/tty/serial/altera_jtaguart.c
+++ b/drivers/tty/serial/altera_jtaguart.c
@@ -280,7 +280,7 @@
/*
* Define the basic serial functions we support.
*/
-static struct uart_ops altera_jtaguart_ops = {
+static const struct uart_ops altera_jtaguart_ops = {
.tx_empty = altera_jtaguart_tx_empty,
.get_mctrl = altera_jtaguart_get_mctrl,
.set_mctrl = altera_jtaguart_set_mctrl,
diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c
index 61b607f..820a742 100644
--- a/drivers/tty/serial/altera_uart.c
+++ b/drivers/tty/serial/altera_uart.c
@@ -404,7 +404,7 @@
/*
* Define the basic serial functions we support.
*/
-static struct uart_ops altera_uart_ops = {
+static const struct uart_ops altera_uart_ops = {
.tx_empty = altera_uart_tx_empty,
.get_mctrl = altera_uart_get_mctrl,
.set_mctrl = altera_uart_set_mctrl,
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 8a9e213..e2c33b9 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -93,6 +93,10 @@
struct vendor_data {
const u16 *reg_offset;
unsigned int ifls;
+ unsigned int fr_busy;
+ unsigned int fr_dsr;
+ unsigned int fr_cts;
+ unsigned int fr_ri;
bool access_32b;
bool oversampling;
bool dma_threshold;
@@ -111,6 +115,10 @@
static struct vendor_data vendor_arm = {
.reg_offset = pl011_std_offsets,
.ifls = UART011_IFLS_RX4_8|UART011_IFLS_TX4_8,
+ .fr_busy = UART01x_FR_BUSY,
+ .fr_dsr = UART01x_FR_DSR,
+ .fr_cts = UART01x_FR_CTS,
+ .fr_ri = UART011_FR_RI,
.oversampling = false,
.dma_threshold = false,
.cts_event_workaround = false,
@@ -121,6 +129,10 @@
static struct vendor_data vendor_sbsa = {
.reg_offset = pl011_std_offsets,
+ .fr_busy = UART01x_FR_BUSY,
+ .fr_dsr = UART01x_FR_DSR,
+ .fr_cts = UART01x_FR_CTS,
+ .fr_ri = UART011_FR_RI,
.access_32b = true,
.oversampling = false,
.dma_threshold = false,
@@ -164,6 +176,10 @@
static struct vendor_data vendor_st = {
.reg_offset = pl011_st_offsets,
.ifls = UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF,
+ .fr_busy = UART01x_FR_BUSY,
+ .fr_dsr = UART01x_FR_DSR,
+ .fr_cts = UART01x_FR_CTS,
+ .fr_ri = UART011_FR_RI,
.oversampling = true,
.dma_threshold = true,
.cts_event_workaround = true,
@@ -188,11 +204,20 @@
[REG_DMACR] = ZX_UART011_DMACR,
};
-static struct vendor_data vendor_zte __maybe_unused = {
+static unsigned int get_fifosize_zte(struct amba_device *dev)
+{
+ return 16;
+}
+
+static struct vendor_data vendor_zte = {
.reg_offset = pl011_zte_offsets,
.access_32b = true,
.ifls = UART011_IFLS_RX4_8|UART011_IFLS_TX4_8,
- .get_fifosize = get_fifosize_arm,
+ .fr_busy = ZX_UART01x_FR_BUSY,
+ .fr_dsr = ZX_UART01x_FR_DSR,
+ .fr_cts = ZX_UART01x_FR_CTS,
+ .fr_ri = ZX_UART011_FR_RI,
+ .get_fifosize = get_fifosize_zte,
};
/* Deals with DMA transactions */
@@ -1167,7 +1192,7 @@
return;
/* Disable RX and TX DMA */
- while (pl011_read(uap, REG_FR) & UART01x_FR_BUSY)
+ while (pl011_read(uap, REG_FR) & uap->vendor->fr_busy)
cpu_relax();
spin_lock_irq(&uap->port.lock);
@@ -1416,11 +1441,12 @@
if (delta & UART01x_FR_DCD)
uart_handle_dcd_change(&uap->port, status & UART01x_FR_DCD);
- if (delta & UART01x_FR_DSR)
+ if (delta & uap->vendor->fr_dsr)
uap->port.icount.dsr++;
- if (delta & UART01x_FR_CTS)
- uart_handle_cts_change(&uap->port, status & UART01x_FR_CTS);
+ if (delta & uap->vendor->fr_cts)
+ uart_handle_cts_change(&uap->port,
+ status & uap->vendor->fr_cts);
wake_up_interruptible(&uap->port.state->port.delta_msr_wait);
}
@@ -1493,7 +1519,8 @@
struct uart_amba_port *uap =
container_of(port, struct uart_amba_port, port);
unsigned int status = pl011_read(uap, REG_FR);
- return status & (UART01x_FR_BUSY|UART01x_FR_TXFF) ? 0 : TIOCSER_TEMT;
+ return status & (uap->vendor->fr_busy | UART01x_FR_TXFF) ?
+ 0 : TIOCSER_TEMT;
}
static unsigned int pl011_get_mctrl(struct uart_port *port)
@@ -1508,9 +1535,9 @@
result |= tiocmbit
TIOCMBIT(UART01x_FR_DCD, TIOCM_CAR);
- TIOCMBIT(UART01x_FR_DSR, TIOCM_DSR);
- TIOCMBIT(UART01x_FR_CTS, TIOCM_CTS);
- TIOCMBIT(UART011_FR_RI, TIOCM_RNG);
+ TIOCMBIT(uap->vendor->fr_dsr, TIOCM_DSR);
+ TIOCMBIT(uap->vendor->fr_cts, TIOCM_CTS);
+ TIOCMBIT(uap->vendor->fr_ri, TIOCM_RNG);
#undef TIOCMBIT
return result;
}
@@ -2191,7 +2218,7 @@
* Finally, wait for transmitter to become empty
* and restore the TCR
*/
- while (pl011_read(uap, REG_FR) & UART01x_FR_BUSY)
+ while (pl011_read(uap, REG_FR) & uap->vendor->fr_busy)
cpu_relax();
if (!uap->vendor->always_enabled)
pl011_write(old_cr, uap, REG_CR);
@@ -2555,7 +2582,8 @@
ret = platform_get_irq(pdev, 0);
if (ret < 0) {
- dev_err(&pdev->dev, "cannot obtain irq\n");
+ if (ret != -EPROBE_DEFER)
+ dev_err(&pdev->dev, "cannot obtain irq\n");
return ret;
}
uap->port.irq = ret;
@@ -2622,6 +2650,11 @@
.mask = 0x00ffffff,
.data = &vendor_st,
},
+ {
+ .id = AMBA_LINUX_ID(0x00, 0x1, 0xffe),
+ .mask = 0x00ffffff,
+ .data = &vendor_zte,
+ },
{ 0, 0 },
};
diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c
index 3a1de5c..5ac06fc 100644
--- a/drivers/tty/serial/arc_uart.c
+++ b/drivers/tty/serial/arc_uart.c
@@ -464,7 +464,7 @@
}
#endif
-static struct uart_ops arc_serial_pops = {
+static const struct uart_ops arc_serial_pops = {
.tx_empty = arc_serial_tx_empty,
.set_mctrl = arc_serial_set_mctrl,
.get_mctrl = arc_serial_get_mctrl,
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 2eaa18d..fd8aa1f 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -166,6 +166,7 @@
u32 rts_low;
bool ms_irq_enabled;
u32 rtor; /* address of receiver timeout register if it exists */
+ bool has_frac_baudrate;
bool has_hw_timer;
struct timer_list uart_timer;
@@ -1634,8 +1635,8 @@
if (np) {
/* DMA/PDC usage specification */
- if (of_get_property(np, "atmel,use-dma-rx", NULL)) {
- if (of_get_property(np, "dmas", NULL)) {
+ if (of_property_read_bool(np, "atmel,use-dma-rx")) {
+ if (of_property_read_bool(np, "dmas")) {
atmel_port->use_dma_rx = true;
atmel_port->use_pdc_rx = false;
} else {
@@ -1647,8 +1648,8 @@
atmel_port->use_pdc_rx = false;
}
- if (of_get_property(np, "atmel,use-dma-tx", NULL)) {
- if (of_get_property(np, "dmas", NULL)) {
+ if (of_property_read_bool(np, "atmel,use-dma-tx")) {
+ if (of_property_read_bool(np, "dmas")) {
atmel_port->use_dma_tx = true;
atmel_port->use_pdc_tx = false;
} else {
@@ -1745,6 +1746,11 @@
dbgu_uart = 0x44424755; /* DBGU */
new_uart = 0x55415254; /* UART */
+ /*
+ * Only USART devices from at91sam9260 SOC implement fractional
+ * baudrate.
+ */
+ atmel_port->has_frac_baudrate = false;
atmel_port->has_hw_timer = false;
if (name == new_uart) {
@@ -1753,6 +1759,7 @@
atmel_port->rtor = ATMEL_UA_RTOR;
} else if (name == usart) {
dev_dbg(port->dev, "Usart\n");
+ atmel_port->has_frac_baudrate = true;
atmel_port->has_hw_timer = true;
atmel_port->rtor = ATMEL_US_RTOR;
} else if (name == dbgu_uart) {
@@ -1764,6 +1771,7 @@
case 0x302:
case 0x10213:
dev_dbg(port->dev, "This version is usart\n");
+ atmel_port->has_frac_baudrate = true;
atmel_port->has_hw_timer = true;
atmel_port->rtor = ATMEL_US_RTOR;
break;
@@ -1929,6 +1937,9 @@
{
struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
+ /* Disable modem control lines interrupts */
+ atmel_disable_ms(port);
+
/* Disable interrupts at device level */
atmel_uart_writel(port, ATMEL_US_IDR, -1);
@@ -1979,8 +1990,6 @@
*/
free_irq(port->irq, port);
- atmel_port->ms_irq_enabled = false;
-
atmel_flush_buffer(port);
}
@@ -2025,8 +2034,9 @@
static void atmel_set_termios(struct uart_port *port, struct ktermios *termios,
struct ktermios *old)
{
+ struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
unsigned long flags;
- unsigned int old_mode, mode, imr, quot, baud;
+ unsigned int old_mode, mode, imr, quot, baud, div, cd, fp = 0;
/* save the current mode register */
mode = old_mode = atmel_uart_readl(port, ATMEL_US_MR);
@@ -2036,12 +2046,6 @@
ATMEL_US_PAR | ATMEL_US_USMODE);
baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16);
- quot = uart_get_divisor(port, baud);
-
- if (quot > 65535) { /* BRGR is 16-bit, so switch to slower clock */
- quot /= 8;
- mode |= ATMEL_US_USCLKS_MCK_DIV8;
- }
/* byte size */
switch (termios->c_cflag & CSIZE) {
@@ -2160,7 +2164,31 @@
atmel_uart_writel(port, ATMEL_US_CR, rts_state);
}
- /* set the baud rate */
+ /*
+ * Set the baud rate:
+ * Fractional baudrate allows to setup output frequency more
+ * accurately. This feature is enabled only when using normal mode.
+ * baudrate = selected clock / (8 * (2 - OVER) * (CD + FP / 8))
+ * Currently, OVER is always set to 0 so we get
+ * baudrate = selected clock / (16 * (CD + FP / 8))
+ * then
+ * 8 CD + FP = selected clock / (2 * baudrate)
+ */
+ if (atmel_port->has_frac_baudrate &&
+ (mode & ATMEL_US_USMODE) == ATMEL_US_USMODE_NORMAL) {
+ div = DIV_ROUND_CLOSEST(port->uartclk, baud * 2);
+ cd = div >> 3;
+ fp = div & ATMEL_US_FP_MASK;
+ } else {
+ cd = uart_get_divisor(port, baud);
+ }
+
+ if (cd > 65535) { /* BRGR is 16-bit, so switch to slower clock */
+ cd /= 8;
+ mode |= ATMEL_US_USCLKS_MCK_DIV8;
+ }
+ quot = cd | fp << ATMEL_US_FP_OFFSET;
+
atmel_uart_writel(port, ATMEL_US_BRGR, quot);
atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_RSTSTA | ATMEL_US_RSTRX);
atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN | ATMEL_US_RXEN);
@@ -2292,7 +2320,7 @@
}
#endif
-static struct uart_ops atmel_pops = {
+static const struct uart_ops atmel_pops = {
.tx_empty = atmel_tx_empty,
.set_mctrl = atmel_set_mctrl,
.get_mctrl = atmel_get_mctrl,
diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c
index 5108fab..583c9a0 100644
--- a/drivers/tty/serial/bcm63xx_uart.c
+++ b/drivers/tty/serial/bcm63xx_uart.c
@@ -631,7 +631,7 @@
}
/* serial core callbacks */
-static struct uart_ops bcm_uart_ops = {
+static const struct uart_ops bcm_uart_ops = {
.tx_empty = bcm_uart_tx_empty,
.get_mctrl = bcm_uart_get_mctrl,
.set_mctrl = bcm_uart_set_mctrl,
diff --git a/drivers/tty/serial/earlycon-arm-semihost.c b/drivers/tty/serial/earlycon-arm-semihost.c
index 383db10..6bbeb69 100644
--- a/drivers/tty/serial/earlycon-arm-semihost.c
+++ b/drivers/tty/serial/earlycon-arm-semihost.c
@@ -53,7 +53,8 @@
uart_console_write(&dev->port, s, n, smh_putc);
}
-int __init early_smh_setup(struct earlycon_device *device, const char *opt)
+static int
+__init early_smh_setup(struct earlycon_device *device, const char *opt)
{
device->con->write = smh_write;
return 0;
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
index 067783f..c365154 100644
--- a/drivers/tty/serial/earlycon.c
+++ b/drivers/tty/serial/earlycon.c
@@ -21,6 +21,7 @@
#include <linux/sizes.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
+#include <linux/acpi.h>
#ifdef CONFIG_FIX_EARLYCON_MEM
#include <asm/fixmap.h>
@@ -38,7 +39,7 @@
.con = &early_con,
};
-static void __iomem * __init earlycon_map(unsigned long paddr, size_t size)
+static void __iomem * __init earlycon_map(resource_size_t paddr, size_t size)
{
void __iomem *base;
#ifdef CONFIG_FIX_EARLYCON_MEM
@@ -49,8 +50,7 @@
base = ioremap(paddr, size);
#endif
if (!base)
- pr_err("%s: Couldn't map 0x%llx\n", __func__,
- (unsigned long long)paddr);
+ pr_err("%s: Couldn't map %pa\n", __func__, &paddr);
return base;
}
@@ -92,7 +92,7 @@
{
struct uart_port *port = &device->port;
int length;
- unsigned long addr;
+ resource_size_t addr;
if (uart_parse_earlycon(options, &port->iotype, &addr, &options))
return -EINVAL;
@@ -199,6 +199,14 @@
return -ENOENT;
}
+/*
+ * When CONFIG_ACPI_SPCR_TABLE is defined, "earlycon" without parameters in
+ * command line does not start DT earlycon immediately, instead it defers
+ * starting it until DT/ACPI decision is made. At that time if ACPI is enabled
+ * call parse_spcr(), else call early_init_dt_scan_chosen_stdout()
+ */
+bool earlycon_init_is_deferred __initdata;
+
/* early_param wrapper for setup_earlycon() */
static int __init param_setup_earlycon(char *buf)
{
@@ -208,8 +216,14 @@
* Just 'earlycon' is a valid param for devicetree earlycons;
* don't generate a warning from parse_early_params() in that case
*/
- if (!buf || !buf[0])
- return 0;
+ if (!buf || !buf[0]) {
+ if (IS_ENABLED(CONFIG_ACPI_SPCR_TABLE)) {
+ earlycon_init_is_deferred = true;
+ return 0;
+ } else {
+ return early_init_dt_scan_chosen_stdout();
+ }
+ }
err = setup_earlycon(buf);
if (err == -ENOENT || err == -EALREADY)
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 7f95f78..de9d510 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -224,7 +224,8 @@
#define UARTWATER_TXWATER_OFF 0
#define UARTWATER_RXWATER_OFF 16
-#define FSL_UART_RX_DMA_BUFFER_SIZE 64
+/* Rx DMA timeout in ms, which is used to calculate Rx ring buffer size */
+#define DMA_RX_TIMEOUT (10)
#define DRIVER_NAME "fsl-lpuart"
#define DEV_NAME "ttyLP"
@@ -243,18 +244,18 @@
struct dma_chan *dma_rx_chan;
struct dma_async_tx_descriptor *dma_tx_desc;
struct dma_async_tx_descriptor *dma_rx_desc;
- dma_addr_t dma_tx_buf_bus;
- dma_addr_t dma_rx_buf_bus;
dma_cookie_t dma_tx_cookie;
dma_cookie_t dma_rx_cookie;
- unsigned char *dma_tx_buf_virt;
- unsigned char *dma_rx_buf_virt;
unsigned int dma_tx_bytes;
unsigned int dma_rx_bytes;
- int dma_tx_in_progress;
- int dma_rx_in_progress;
+ bool dma_tx_in_progress;
unsigned int dma_rx_timeout;
struct timer_list lpuart_timer;
+ struct scatterlist rx_sgl, tx_sgl[2];
+ struct circ_buf rx_ring;
+ int rx_dma_rng_buf_len;
+ unsigned int dma_tx_nents;
+ wait_queue_head_t dma_wait;
};
static const struct of_device_id lpuart_dt_ids[] = {
@@ -270,7 +271,6 @@
/* Forward declare this for the dma callbacks*/
static void lpuart_dma_tx_complete(void *arg);
-static void lpuart_dma_rx_complete(void *arg);
static u32 lpuart32_read(void __iomem *addr)
{
@@ -316,217 +316,121 @@
lpuart32_write(temp & ~UARTCTRL_RE, port->membase + UARTCTRL);
}
-static void lpuart_copy_rx_to_tty(struct lpuart_port *sport,
- struct tty_port *tty, int count)
+static void lpuart_dma_tx(struct lpuart_port *sport)
{
- int copied;
+ struct circ_buf *xmit = &sport->port.state->xmit;
+ struct scatterlist *sgl = sport->tx_sgl;
+ struct device *dev = sport->port.dev;
+ int ret;
- sport->port.icount.rx += count;
+ if (sport->dma_tx_in_progress)
+ return;
- if (!tty) {
- dev_err(sport->port.dev, "No tty port\n");
+ sport->dma_tx_bytes = uart_circ_chars_pending(xmit);
+
+ if (xmit->tail < xmit->head) {
+ sport->dma_tx_nents = 1;
+ sg_init_one(sgl, xmit->buf + xmit->tail, sport->dma_tx_bytes);
+ } else {
+ sport->dma_tx_nents = 2;
+ sg_init_table(sgl, 2);
+ sg_set_buf(sgl, xmit->buf + xmit->tail,
+ UART_XMIT_SIZE - xmit->tail);
+ sg_set_buf(sgl + 1, xmit->buf, xmit->head);
+ }
+
+ ret = dma_map_sg(dev, sgl, sport->dma_tx_nents, DMA_TO_DEVICE);
+ if (!ret) {
+ dev_err(dev, "DMA mapping error for TX.\n");
return;
}
- dma_sync_single_for_cpu(sport->port.dev, sport->dma_rx_buf_bus,
- FSL_UART_RX_DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
- copied = tty_insert_flip_string(tty,
- ((unsigned char *)(sport->dma_rx_buf_virt)), count);
-
- if (copied != count) {
- WARN_ON(1);
- dev_err(sport->port.dev, "RxData copy to tty layer failed\n");
- }
-
- dma_sync_single_for_device(sport->port.dev, sport->dma_rx_buf_bus,
- FSL_UART_RX_DMA_BUFFER_SIZE, DMA_TO_DEVICE);
-}
-
-static void lpuart_pio_tx(struct lpuart_port *sport)
-{
- struct circ_buf *xmit = &sport->port.state->xmit;
- unsigned long flags;
-
- spin_lock_irqsave(&sport->port.lock, flags);
-
- while (!uart_circ_empty(xmit) &&
- readb(sport->port.membase + UARTTCFIFO) < sport->txfifo_size) {
- writeb(xmit->buf[xmit->tail], sport->port.membase + UARTDR);
- xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
- sport->port.icount.tx++;
- }
-
- if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
- uart_write_wakeup(&sport->port);
-
- if (uart_circ_empty(xmit))
- writeb(readb(sport->port.membase + UARTCR5) | UARTCR5_TDMAS,
- sport->port.membase + UARTCR5);
-
- spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
-static int lpuart_dma_tx(struct lpuart_port *sport, unsigned long count)
-{
- struct circ_buf *xmit = &sport->port.state->xmit;
- dma_addr_t tx_bus_addr;
-
- dma_sync_single_for_device(sport->port.dev, sport->dma_tx_buf_bus,
- UART_XMIT_SIZE, DMA_TO_DEVICE);
- sport->dma_tx_bytes = count & ~(sport->txfifo_size - 1);
- tx_bus_addr = sport->dma_tx_buf_bus + xmit->tail;
- sport->dma_tx_desc = dmaengine_prep_slave_single(sport->dma_tx_chan,
- tx_bus_addr, sport->dma_tx_bytes,
+ sport->dma_tx_desc = dmaengine_prep_slave_sg(sport->dma_tx_chan, sgl,
+ sport->dma_tx_nents,
DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
-
if (!sport->dma_tx_desc) {
- dev_err(sport->port.dev, "Not able to get desc for tx\n");
- return -EIO;
+ dma_unmap_sg(dev, sgl, sport->dma_tx_nents, DMA_TO_DEVICE);
+ dev_err(dev, "Cannot prepare TX slave DMA!\n");
+ return;
}
sport->dma_tx_desc->callback = lpuart_dma_tx_complete;
sport->dma_tx_desc->callback_param = sport;
- sport->dma_tx_in_progress = 1;
+ sport->dma_tx_in_progress = true;
sport->dma_tx_cookie = dmaengine_submit(sport->dma_tx_desc);
dma_async_issue_pending(sport->dma_tx_chan);
- return 0;
-}
-
-static void lpuart_prepare_tx(struct lpuart_port *sport)
-{
- struct circ_buf *xmit = &sport->port.state->xmit;
- unsigned long count = CIRC_CNT_TO_END(xmit->head,
- xmit->tail, UART_XMIT_SIZE);
-
- if (!count)
- return;
-
- if (count < sport->txfifo_size)
- writeb(readb(sport->port.membase + UARTCR5) & ~UARTCR5_TDMAS,
- sport->port.membase + UARTCR5);
- else {
- writeb(readb(sport->port.membase + UARTCR5) | UARTCR5_TDMAS,
- sport->port.membase + UARTCR5);
- lpuart_dma_tx(sport, count);
- }
}
static void lpuart_dma_tx_complete(void *arg)
{
struct lpuart_port *sport = arg;
+ struct scatterlist *sgl = &sport->tx_sgl[0];
struct circ_buf *xmit = &sport->port.state->xmit;
unsigned long flags;
- async_tx_ack(sport->dma_tx_desc);
-
spin_lock_irqsave(&sport->port.lock, flags);
+ dma_unmap_sg(sport->port.dev, sgl, sport->dma_tx_nents, DMA_TO_DEVICE);
+
xmit->tail = (xmit->tail + sport->dma_tx_bytes) & (UART_XMIT_SIZE - 1);
- sport->dma_tx_in_progress = 0;
+
+ sport->port.icount.tx += sport->dma_tx_bytes;
+ sport->dma_tx_in_progress = false;
+ spin_unlock_irqrestore(&sport->port.lock, flags);
if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
uart_write_wakeup(&sport->port);
- lpuart_prepare_tx(sport);
+ if (waitqueue_active(&sport->dma_wait)) {
+ wake_up(&sport->dma_wait);
+ return;
+ }
+
+ spin_lock_irqsave(&sport->port.lock, flags);
+
+ if (!uart_circ_empty(xmit) && !uart_tx_stopped(&sport->port))
+ lpuart_dma_tx(sport);
spin_unlock_irqrestore(&sport->port.lock, flags);
}
-static int lpuart_dma_rx(struct lpuart_port *sport)
+static int lpuart_dma_tx_request(struct uart_port *port)
{
- dma_sync_single_for_device(sport->port.dev, sport->dma_rx_buf_bus,
- FSL_UART_RX_DMA_BUFFER_SIZE, DMA_TO_DEVICE);
- sport->dma_rx_desc = dmaengine_prep_slave_single(sport->dma_rx_chan,
- sport->dma_rx_buf_bus, FSL_UART_RX_DMA_BUFFER_SIZE,
- DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
+ struct lpuart_port *sport = container_of(port,
+ struct lpuart_port, port);
+ struct dma_slave_config dma_tx_sconfig = {};
+ int ret;
- if (!sport->dma_rx_desc) {
- dev_err(sport->port.dev, "Not able to get desc for rx\n");
- return -EIO;
+ dma_tx_sconfig.dst_addr = sport->port.mapbase + UARTDR;
+ dma_tx_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+ dma_tx_sconfig.dst_maxburst = 1;
+ dma_tx_sconfig.direction = DMA_MEM_TO_DEV;
+ ret = dmaengine_slave_config(sport->dma_tx_chan, &dma_tx_sconfig);
+
+ if (ret) {
+ dev_err(sport->port.dev,
+ "DMA slave config failed, err = %d\n", ret);
+ return ret;
}
- sport->dma_rx_desc->callback = lpuart_dma_rx_complete;
- sport->dma_rx_desc->callback_param = sport;
- sport->dma_rx_in_progress = 1;
- sport->dma_rx_cookie = dmaengine_submit(sport->dma_rx_desc);
- dma_async_issue_pending(sport->dma_rx_chan);
-
return 0;
}
static void lpuart_flush_buffer(struct uart_port *port)
{
struct lpuart_port *sport = container_of(port, struct lpuart_port, port);
+
if (sport->lpuart_dma_tx_use) {
+ if (sport->dma_tx_in_progress) {
+ dma_unmap_sg(sport->port.dev, &sport->tx_sgl[0],
+ sport->dma_tx_nents, DMA_TO_DEVICE);
+ sport->dma_tx_in_progress = false;
+ }
dmaengine_terminate_all(sport->dma_tx_chan);
- sport->dma_tx_in_progress = 0;
}
}
-static void lpuart_dma_rx_complete(void *arg)
-{
- struct lpuart_port *sport = arg;
- struct tty_port *port = &sport->port.state->port;
- unsigned long flags;
-
- async_tx_ack(sport->dma_rx_desc);
- mod_timer(&sport->lpuart_timer, jiffies + sport->dma_rx_timeout);
-
- spin_lock_irqsave(&sport->port.lock, flags);
-
- sport->dma_rx_in_progress = 0;
- lpuart_copy_rx_to_tty(sport, port, FSL_UART_RX_DMA_BUFFER_SIZE);
- tty_flip_buffer_push(port);
- lpuart_dma_rx(sport);
-
- spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
-static void lpuart_timer_func(unsigned long data)
-{
- struct lpuart_port *sport = (struct lpuart_port *)data;
- struct tty_port *port = &sport->port.state->port;
- struct dma_tx_state state;
- unsigned long flags;
- unsigned char temp;
- int count;
-
- del_timer(&sport->lpuart_timer);
- dmaengine_pause(sport->dma_rx_chan);
- dmaengine_tx_status(sport->dma_rx_chan, sport->dma_rx_cookie, &state);
- dmaengine_terminate_all(sport->dma_rx_chan);
- count = FSL_UART_RX_DMA_BUFFER_SIZE - state.residue;
- async_tx_ack(sport->dma_rx_desc);
-
- spin_lock_irqsave(&sport->port.lock, flags);
-
- sport->dma_rx_in_progress = 0;
- lpuart_copy_rx_to_tty(sport, port, count);
- tty_flip_buffer_push(port);
- temp = readb(sport->port.membase + UARTCR5);
- writeb(temp & ~UARTCR5_RDMAS, sport->port.membase + UARTCR5);
-
- spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
-static inline void lpuart_prepare_rx(struct lpuart_port *sport)
-{
- unsigned long flags;
- unsigned char temp;
-
- spin_lock_irqsave(&sport->port.lock, flags);
-
- sport->lpuart_timer.expires = jiffies + sport->dma_rx_timeout;
- add_timer(&sport->lpuart_timer);
-
- lpuart_dma_rx(sport);
- temp = readb(sport->port.membase + UARTCR5);
- writeb(temp | UARTCR5_RDMAS, sport->port.membase + UARTCR5);
-
- spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
static inline void lpuart_transmit_buffer(struct lpuart_port *sport)
{
struct circ_buf *xmit = &sport->port.state->xmit;
@@ -580,8 +484,8 @@
writeb(temp | UARTCR2_TIE, port->membase + UARTCR2);
if (sport->lpuart_dma_tx_use) {
- if (!uart_circ_empty(xmit) && !sport->dma_tx_in_progress)
- lpuart_prepare_tx(sport);
+ if (!uart_circ_empty(xmit) && !uart_tx_stopped(port))
+ lpuart_dma_tx(sport);
} else {
if (readb(port->membase + UARTSR1) & UARTSR1_TDRE)
lpuart_transmit_buffer(sport);
@@ -600,6 +504,29 @@
lpuart32_transmit_buffer(sport);
}
+/* return TIOCSER_TEMT when transmitter is not busy */
+static unsigned int lpuart_tx_empty(struct uart_port *port)
+{
+ struct lpuart_port *sport = container_of(port,
+ struct lpuart_port, port);
+ unsigned char sr1 = readb(port->membase + UARTSR1);
+ unsigned char sfifo = readb(port->membase + UARTSFIFO);
+
+ if (sport->dma_tx_in_progress)
+ return 0;
+
+ if (sr1 & UARTSR1_TC && sfifo & UARTSFIFO_TXEMPT)
+ return TIOCSER_TEMT;
+
+ return 0;
+}
+
+static unsigned int lpuart32_tx_empty(struct uart_port *port)
+{
+ return (lpuart32_read(port->membase + UARTSTAT) & UARTSTAT_TC) ?
+ TIOCSER_TEMT : 0;
+}
+
static irqreturn_t lpuart_txint(int irq, void *dev_id)
{
struct lpuart_port *sport = dev_id;
@@ -766,23 +693,15 @@
static irqreturn_t lpuart_int(int irq, void *dev_id)
{
struct lpuart_port *sport = dev_id;
- unsigned char sts, crdma;
+ unsigned char sts;
sts = readb(sport->port.membase + UARTSR1);
- crdma = readb(sport->port.membase + UARTCR5);
- if (sts & UARTSR1_RDRF && !(crdma & UARTCR5_RDMAS)) {
- if (sport->lpuart_dma_rx_use)
- lpuart_prepare_rx(sport);
- else
- lpuart_rxint(irq, dev_id);
- }
- if (sts & UARTSR1_TDRE && !(crdma & UARTCR5_TDMAS)) {
- if (sport->lpuart_dma_tx_use)
- lpuart_pio_tx(sport);
- else
- lpuart_txint(irq, dev_id);
- }
+ if (sts & UARTSR1_RDRF)
+ lpuart_rxint(irq, dev_id);
+
+ if (sts & UARTSR1_TDRE)
+ lpuart_txint(irq, dev_id);
return IRQ_HANDLED;
}
@@ -807,17 +726,241 @@
return IRQ_HANDLED;
}
-/* return TIOCSER_TEMT when transmitter is not busy */
-static unsigned int lpuart_tx_empty(struct uart_port *port)
+static void lpuart_copy_rx_to_tty(struct lpuart_port *sport)
{
- return (readb(port->membase + UARTSR1) & UARTSR1_TC) ?
- TIOCSER_TEMT : 0;
+ struct tty_port *port = &sport->port.state->port;
+ struct dma_tx_state state;
+ enum dma_status dmastat;
+ struct circ_buf *ring = &sport->rx_ring;
+ unsigned long flags;
+ int count = 0;
+ unsigned char sr;
+
+ sr = readb(sport->port.membase + UARTSR1);
+
+ if (sr & (UARTSR1_PE | UARTSR1_FE)) {
+ /* Read DR to clear the error flags */
+ readb(sport->port.membase + UARTDR);
+
+ if (sr & UARTSR1_PE)
+ sport->port.icount.parity++;
+ else if (sr & UARTSR1_FE)
+ sport->port.icount.frame++;
+ }
+
+ async_tx_ack(sport->dma_rx_desc);
+
+ spin_lock_irqsave(&sport->port.lock, flags);
+
+ dmastat = dmaengine_tx_status(sport->dma_rx_chan,
+ sport->dma_rx_cookie,
+ &state);
+
+ if (dmastat == DMA_ERROR) {
+ dev_err(sport->port.dev, "Rx DMA transfer failed!\n");
+ spin_unlock_irqrestore(&sport->port.lock, flags);
+ return;
+ }
+
+ /* CPU claims ownership of RX DMA buffer */
+ dma_sync_sg_for_cpu(sport->port.dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
+
+ /*
+ * ring->head points to the end of data already written by the DMA.
+ * ring->tail points to the beginning of data to be read by the
+ * framework.
+ * The current transfer size should not be larger than the dma buffer
+ * length.
+ */
+ ring->head = sport->rx_sgl.length - state.residue;
+ BUG_ON(ring->head > sport->rx_sgl.length);
+ /*
+ * At this point ring->head may point to the first byte right after the
+ * last byte of the dma buffer:
+ * 0 <= ring->head <= sport->rx_sgl.length
+ *
+ * However ring->tail must always points inside the dma buffer:
+ * 0 <= ring->tail <= sport->rx_sgl.length - 1
+ *
+ * Since we use a ring buffer, we have to handle the case
+ * where head is lower than tail. In such a case, we first read from
+ * tail to the end of the buffer then reset tail.
+ */
+ if (ring->head < ring->tail) {
+ count = sport->rx_sgl.length - ring->tail;
+
+ tty_insert_flip_string(port, ring->buf + ring->tail, count);
+ ring->tail = 0;
+ sport->port.icount.rx += count;
+ }
+
+ /* Finally we read data from tail to head */
+ if (ring->tail < ring->head) {
+ count = ring->head - ring->tail;
+ tty_insert_flip_string(port, ring->buf + ring->tail, count);
+ /* Wrap ring->head if needed */
+ if (ring->head >= sport->rx_sgl.length)
+ ring->head = 0;
+ ring->tail = ring->head;
+ sport->port.icount.rx += count;
+ }
+
+ dma_sync_sg_for_device(sport->port.dev, &sport->rx_sgl, 1,
+ DMA_FROM_DEVICE);
+
+ spin_unlock_irqrestore(&sport->port.lock, flags);
+
+ tty_flip_buffer_push(port);
+ mod_timer(&sport->lpuart_timer, jiffies + sport->dma_rx_timeout);
}
-static unsigned int lpuart32_tx_empty(struct uart_port *port)
+static void lpuart_dma_rx_complete(void *arg)
{
- return (lpuart32_read(port->membase + UARTSTAT) & UARTSTAT_TC) ?
- TIOCSER_TEMT : 0;
+ struct lpuart_port *sport = arg;
+
+ lpuart_copy_rx_to_tty(sport);
+}
+
+static void lpuart_timer_func(unsigned long data)
+{
+ struct lpuart_port *sport = (struct lpuart_port *)data;
+
+ lpuart_copy_rx_to_tty(sport);
+}
+
+static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
+{
+ struct dma_slave_config dma_rx_sconfig = {};
+ struct circ_buf *ring = &sport->rx_ring;
+ int ret, nent;
+ int bits, baud;
+ struct tty_struct *tty = tty_port_tty_get(&sport->port.state->port);
+ struct ktermios *termios = &tty->termios;
+
+ baud = tty_get_baud_rate(tty);
+
+ bits = (termios->c_cflag & CSIZE) == CS7 ? 9 : 10;
+ if (termios->c_cflag & PARENB)
+ bits++;
+
+ /*
+ * Calculate length of one DMA buffer size to keep latency below
+ * 10ms at any baud rate.
+ */
+ sport->rx_dma_rng_buf_len = (DMA_RX_TIMEOUT * baud / bits / 1000) * 2;
+ sport->rx_dma_rng_buf_len = (1 << (fls(sport->rx_dma_rng_buf_len) - 1));
+ if (sport->rx_dma_rng_buf_len < 16)
+ sport->rx_dma_rng_buf_len = 16;
+
+ ring->buf = kmalloc(sport->rx_dma_rng_buf_len, GFP_ATOMIC);
+ if (!ring->buf) {
+ dev_err(sport->port.dev, "Ring buf alloc failed\n");
+ return -ENOMEM;
+ }
+
+ sg_init_one(&sport->rx_sgl, ring->buf, sport->rx_dma_rng_buf_len);
+ sg_set_buf(&sport->rx_sgl, ring->buf, sport->rx_dma_rng_buf_len);
+ nent = dma_map_sg(sport->port.dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
+
+ if (!nent) {
+ dev_err(sport->port.dev, "DMA Rx mapping error\n");
+ return -EINVAL;
+ }
+
+ dma_rx_sconfig.src_addr = sport->port.mapbase + UARTDR;
+ dma_rx_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+ dma_rx_sconfig.src_maxburst = 1;
+ dma_rx_sconfig.direction = DMA_DEV_TO_MEM;
+ ret = dmaengine_slave_config(sport->dma_rx_chan, &dma_rx_sconfig);
+
+ if (ret < 0) {
+ dev_err(sport->port.dev,
+ "DMA Rx slave config failed, err = %d\n", ret);
+ return ret;
+ }
+
+ sport->dma_rx_desc = dmaengine_prep_dma_cyclic(sport->dma_rx_chan,
+ sg_dma_address(&sport->rx_sgl),
+ sport->rx_sgl.length,
+ sport->rx_sgl.length / 2,
+ DMA_DEV_TO_MEM,
+ DMA_PREP_INTERRUPT);
+ if (!sport->dma_rx_desc) {
+ dev_err(sport->port.dev, "Cannot prepare cyclic DMA\n");
+ return -EFAULT;
+ }
+
+ sport->dma_rx_desc->callback = lpuart_dma_rx_complete;
+ sport->dma_rx_desc->callback_param = sport;
+ sport->dma_rx_cookie = dmaengine_submit(sport->dma_rx_desc);
+ dma_async_issue_pending(sport->dma_rx_chan);
+
+ writeb(readb(sport->port.membase + UARTCR5) | UARTCR5_RDMAS,
+ sport->port.membase + UARTCR5);
+
+ return 0;
+}
+
+static void lpuart_dma_rx_free(struct uart_port *port)
+{
+ struct lpuart_port *sport = container_of(port,
+ struct lpuart_port, port);
+
+ if (sport->dma_rx_chan)
+ dmaengine_terminate_all(sport->dma_rx_chan);
+
+ dma_unmap_sg(sport->port.dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
+ kfree(sport->rx_ring.buf);
+ sport->rx_ring.tail = 0;
+ sport->rx_ring.head = 0;
+ sport->dma_rx_desc = NULL;
+ sport->dma_rx_cookie = -EINVAL;
+}
+
+static int lpuart_config_rs485(struct uart_port *port,
+ struct serial_rs485 *rs485)
+{
+ struct lpuart_port *sport = container_of(port,
+ struct lpuart_port, port);
+
+ u8 modem = readb(sport->port.membase + UARTMODEM) &
+ ~(UARTMODEM_TXRTSPOL | UARTMODEM_TXRTSE);
+ writeb(modem, sport->port.membase + UARTMODEM);
+
+ if (rs485->flags & SER_RS485_ENABLED) {
+ /* Enable auto RS-485 RTS mode */
+ modem |= UARTMODEM_TXRTSE;
+
+ /*
+ * RTS needs to be logic HIGH either during transer _or_ after
+ * transfer, other variants are not supported by the hardware.
+ */
+
+ if (!(rs485->flags & (SER_RS485_RTS_ON_SEND |
+ SER_RS485_RTS_AFTER_SEND)))
+ rs485->flags |= SER_RS485_RTS_ON_SEND;
+
+ if (rs485->flags & SER_RS485_RTS_ON_SEND &&
+ rs485->flags & SER_RS485_RTS_AFTER_SEND)
+ rs485->flags &= ~SER_RS485_RTS_AFTER_SEND;
+
+ /*
+ * The hardware defaults to RTS logic HIGH while transfer.
+ * Switch polarity in case RTS shall be logic HIGH
+ * after transfer.
+ * Note: UART is assumed to be active high.
+ */
+ if (rs485->flags & SER_RS485_RTS_ON_SEND)
+ modem &= ~UARTMODEM_TXRTSPOL;
+ else if (rs485->flags & SER_RS485_RTS_AFTER_SEND)
+ modem |= UARTMODEM_TXRTSPOL;
+ }
+
+ /* Store the new configuration */
+ sport->port.rs485 = *rs485;
+
+ writeb(modem, sport->port.membase + UARTMODEM);
+ return 0;
}
static unsigned int lpuart_get_mctrl(struct uart_port *port)
@@ -853,17 +996,22 @@
static void lpuart_set_mctrl(struct uart_port *port, unsigned int mctrl)
{
unsigned char temp;
+ struct lpuart_port *sport = container_of(port,
+ struct lpuart_port, port);
- temp = readb(port->membase + UARTMODEM) &
+ /* Make sure RXRTSE bit is not set when RS485 is enabled */
+ if (!(sport->port.rs485.flags & SER_RS485_ENABLED)) {
+ temp = readb(sport->port.membase + UARTMODEM) &
~(UARTMODEM_RXRTSE | UARTMODEM_TXCTSE);
- if (mctrl & TIOCM_RTS)
- temp |= UARTMODEM_RXRTSE;
+ if (mctrl & TIOCM_RTS)
+ temp |= UARTMODEM_RXRTSE;
- if (mctrl & TIOCM_CTS)
- temp |= UARTMODEM_TXCTSE;
+ if (mctrl & TIOCM_CTS)
+ temp |= UARTMODEM_TXCTSE;
- writeb(temp, port->membase + UARTMODEM);
+ writeb(temp, port->membase + UARTMODEM);
+ }
}
static void lpuart32_set_mctrl(struct uart_port *port, unsigned int mctrl)
@@ -921,13 +1069,16 @@
writeb(val | UARTPFIFO_TXFE | UARTPFIFO_RXFE,
sport->port.membase + UARTPFIFO);
- /* explicitly clear RDRF */
- readb(sport->port.membase + UARTSR1);
-
/* flush Tx and Rx FIFO */
writeb(UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH,
sport->port.membase + UARTCFIFO);
+ /* explicitly clear RDRF */
+ if (readb(sport->port.membase + UARTSR1) & UARTSR1_RDRF) {
+ readb(sport->port.membase + UARTDR);
+ writeb(UARTSFIFO_RXUF, sport->port.membase + UARTSFIFO);
+ }
+
writeb(0, sport->port.membase + UARTTWFIFO);
writeb(1, sport->port.membase + UARTRWFIFO);
@@ -960,110 +1111,12 @@
lpuart32_write(ctrl_saved, sport->port.membase + UARTCTRL);
}
-static int lpuart_dma_tx_request(struct uart_port *port)
+static void rx_dma_timer_init(struct lpuart_port *sport)
{
- struct lpuart_port *sport = container_of(port,
- struct lpuart_port, port);
- struct dma_slave_config dma_tx_sconfig;
- dma_addr_t dma_bus;
- unsigned char *dma_buf;
- int ret;
-
- dma_bus = dma_map_single(sport->dma_tx_chan->device->dev,
- sport->port.state->xmit.buf,
- UART_XMIT_SIZE, DMA_TO_DEVICE);
-
- if (dma_mapping_error(sport->dma_tx_chan->device->dev, dma_bus)) {
- dev_err(sport->port.dev, "dma_map_single tx failed\n");
- return -ENOMEM;
- }
-
- dma_buf = sport->port.state->xmit.buf;
- dma_tx_sconfig.dst_addr = sport->port.mapbase + UARTDR;
- dma_tx_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
- dma_tx_sconfig.dst_maxburst = sport->txfifo_size;
- dma_tx_sconfig.direction = DMA_MEM_TO_DEV;
- ret = dmaengine_slave_config(sport->dma_tx_chan, &dma_tx_sconfig);
-
- if (ret < 0) {
- dev_err(sport->port.dev,
- "Dma slave config failed, err = %d\n", ret);
- return ret;
- }
-
- sport->dma_tx_buf_virt = dma_buf;
- sport->dma_tx_buf_bus = dma_bus;
- sport->dma_tx_in_progress = 0;
-
- return 0;
-}
-
-static int lpuart_dma_rx_request(struct uart_port *port)
-{
- struct lpuart_port *sport = container_of(port,
- struct lpuart_port, port);
- struct dma_slave_config dma_rx_sconfig;
- dma_addr_t dma_bus;
- unsigned char *dma_buf;
- int ret;
-
- dma_buf = devm_kzalloc(sport->port.dev,
- FSL_UART_RX_DMA_BUFFER_SIZE, GFP_KERNEL);
-
- if (!dma_buf) {
- dev_err(sport->port.dev, "Dma rx alloc failed\n");
- return -ENOMEM;
- }
-
- dma_bus = dma_map_single(sport->dma_rx_chan->device->dev, dma_buf,
- FSL_UART_RX_DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
-
- if (dma_mapping_error(sport->dma_rx_chan->device->dev, dma_bus)) {
- dev_err(sport->port.dev, "dma_map_single rx failed\n");
- return -ENOMEM;
- }
-
- dma_rx_sconfig.src_addr = sport->port.mapbase + UARTDR;
- dma_rx_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
- dma_rx_sconfig.src_maxburst = 1;
- dma_rx_sconfig.direction = DMA_DEV_TO_MEM;
- ret = dmaengine_slave_config(sport->dma_rx_chan, &dma_rx_sconfig);
-
- if (ret < 0) {
- dev_err(sport->port.dev,
- "Dma slave config failed, err = %d\n", ret);
- return ret;
- }
-
- sport->dma_rx_buf_virt = dma_buf;
- sport->dma_rx_buf_bus = dma_bus;
- sport->dma_rx_in_progress = 0;
-
- return 0;
-}
-
-static void lpuart_dma_tx_free(struct uart_port *port)
-{
- struct lpuart_port *sport = container_of(port,
- struct lpuart_port, port);
-
- dma_unmap_single(sport->port.dev, sport->dma_tx_buf_bus,
- UART_XMIT_SIZE, DMA_TO_DEVICE);
-
- sport->dma_tx_buf_bus = 0;
- sport->dma_tx_buf_virt = NULL;
-}
-
-static void lpuart_dma_rx_free(struct uart_port *port)
-{
- struct lpuart_port *sport = container_of(port,
- struct lpuart_port, port);
-
- dma_unmap_single(sport->port.dev, sport->dma_rx_buf_bus,
- FSL_UART_RX_DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
-
- sport->dma_rx_buf_bus = 0;
- sport->dma_rx_buf_virt = NULL;
+ setup_timer(&sport->lpuart_timer, lpuart_timer_func,
+ (unsigned long)sport);
+ sport->lpuart_timer.expires = jiffies + sport->dma_rx_timeout;
+ add_timer(&sport->lpuart_timer);
}
static int lpuart_startup(struct uart_port *port)
@@ -1084,22 +1137,6 @@
sport->rxfifo_size = 0x1 << (((temp >> UARTPFIFO_RXSIZE_OFF) &
UARTPFIFO_FIFOSIZE_MASK) + 1);
- if (sport->dma_rx_chan && !lpuart_dma_rx_request(port)) {
- sport->lpuart_dma_rx_use = true;
- setup_timer(&sport->lpuart_timer, lpuart_timer_func,
- (unsigned long)sport);
- } else
- sport->lpuart_dma_rx_use = false;
-
-
- if (sport->dma_tx_chan && !lpuart_dma_tx_request(port)) {
- sport->lpuart_dma_tx_use = true;
- temp = readb(port->membase + UARTCR5);
- temp &= ~UARTCR5_RDMAS;
- writeb(temp | UARTCR5_TDMAS, port->membase + UARTCR5);
- } else
- sport->lpuart_dma_tx_use = false;
-
ret = devm_request_irq(port->dev, port->irq, lpuart_int, 0,
DRIVER_NAME, sport);
if (ret)
@@ -1113,7 +1150,29 @@
temp |= (UARTCR2_RIE | UARTCR2_TIE | UARTCR2_RE | UARTCR2_TE);
writeb(temp, sport->port.membase + UARTCR2);
+ if (sport->dma_rx_chan && !lpuart_start_rx_dma(sport)) {
+ /* set Rx DMA timeout */
+ sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
+ if (!sport->dma_rx_timeout)
+ sport->dma_rx_timeout = 1;
+
+ sport->lpuart_dma_rx_use = true;
+ rx_dma_timer_init(sport);
+ } else {
+ sport->lpuart_dma_rx_use = false;
+ }
+
+ if (sport->dma_tx_chan && !lpuart_dma_tx_request(port)) {
+ init_waitqueue_head(&sport->dma_wait);
+ sport->lpuart_dma_tx_use = true;
+ temp = readb(port->membase + UARTCR5);
+ writeb(temp | UARTCR5_TDMAS, port->membase + UARTCR5);
+ } else {
+ sport->lpuart_dma_tx_use = false;
+ }
+
spin_unlock_irqrestore(&sport->port.lock, flags);
+
return 0;
}
@@ -1170,12 +1229,19 @@
devm_free_irq(port->dev, port->irq, sport);
if (sport->lpuart_dma_rx_use) {
- lpuart_dma_rx_free(&sport->port);
del_timer_sync(&sport->lpuart_timer);
+ lpuart_dma_rx_free(&sport->port);
}
- if (sport->lpuart_dma_tx_use)
- lpuart_dma_tx_free(&sport->port);
+ if (sport->lpuart_dma_tx_use) {
+ if (wait_event_interruptible(sport->dma_wait,
+ !sport->dma_tx_in_progress) != false) {
+ sport->dma_tx_in_progress = false;
+ dmaengine_terminate_all(sport->dma_tx_chan);
+ }
+
+ lpuart_stop_tx(port);
+ }
}
static void lpuart32_shutdown(struct uart_port *port)
@@ -1203,13 +1269,14 @@
{
struct lpuart_port *sport = container_of(port, struct lpuart_port, port);
unsigned long flags;
- unsigned char cr1, old_cr1, old_cr2, cr4, bdh, modem;
+ unsigned char cr1, old_cr1, old_cr2, cr3, cr4, bdh, modem;
unsigned int baud;
unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8;
unsigned int sbr, brfa;
cr1 = old_cr1 = readb(sport->port.membase + UARTCR1);
old_cr2 = readb(sport->port.membase + UARTCR2);
+ cr3 = readb(sport->port.membase + UARTCR3);
cr4 = readb(sport->port.membase + UARTCR4);
bdh = readb(sport->port.membase + UARTBDH);
modem = readb(sport->port.membase + UARTMODEM);
@@ -1240,6 +1307,13 @@
cr1 |= UARTCR1_M;
}
+ /*
+ * When auto RS-485 RTS mode is enabled,
+ * hardware flow control need to be disabled.
+ */
+ if (sport->port.rs485.flags & SER_RS485_ENABLED)
+ termios->c_cflag &= ~CRTSCTS;
+
if (termios->c_cflag & CRTSCTS) {
modem |= (UARTMODEM_RXRTSE | UARTMODEM_TXCTSE);
} else {
@@ -1257,7 +1331,10 @@
if ((termios->c_cflag & PARENB)) {
if (termios->c_cflag & CMSPAR) {
cr1 &= ~UARTCR1_PE;
- cr1 |= UARTCR1_M;
+ if (termios->c_cflag & PARODD)
+ cr3 |= UARTCR3_T8;
+ else
+ cr3 &= ~UARTCR3_T8;
} else {
cr1 |= UARTCR1_PE;
if ((termios->c_cflag & CSIZE) == CS8)
@@ -1297,17 +1374,6 @@
/* update the per-port timeout */
uart_update_timeout(port, termios->c_cflag, baud);
- if (sport->lpuart_dma_rx_use) {
- /* Calculate delay for 1.5 DMA buffers */
- sport->dma_rx_timeout = (sport->port.timeout - HZ / 50) *
- FSL_UART_RX_DMA_BUFFER_SIZE * 3 /
- sport->rxfifo_size / 2;
- dev_dbg(port->dev, "DMA Rx t-out %ums, tty t-out %u jiffies\n",
- sport->dma_rx_timeout * 1000 / HZ, sport->port.timeout);
- if (sport->dma_rx_timeout < msecs_to_jiffies(20))
- sport->dma_rx_timeout = msecs_to_jiffies(20);
- }
-
/* wait transmit engin complete */
while (!(readb(sport->port.membase + UARTSR1) & UARTSR1_TC))
barrier();
@@ -1325,12 +1391,31 @@
writeb(cr4 | brfa, sport->port.membase + UARTCR4);
writeb(bdh, sport->port.membase + UARTBDH);
writeb(sbr & 0xFF, sport->port.membase + UARTBDL);
+ writeb(cr3, sport->port.membase + UARTCR3);
writeb(cr1, sport->port.membase + UARTCR1);
writeb(modem, sport->port.membase + UARTMODEM);
/* restore control register */
writeb(old_cr2, sport->port.membase + UARTCR2);
+ /*
+ * If new baud rate is set, we will also need to update the Ring buffer
+ * length according to the selected baud rate and restart Rx DMA path.
+ */
+ if (old) {
+ if (sport->lpuart_dma_rx_use) {
+ del_timer_sync(&sport->lpuart_timer);
+ lpuart_dma_rx_free(&sport->port);
+ }
+
+ if (sport->dma_rx_chan && !lpuart_start_rx_dma(sport)) {
+ sport->lpuart_dma_rx_use = true;
+ rx_dma_timer_init(sport);
+ } else {
+ sport->lpuart_dma_rx_use = false;
+ }
+ }
+
spin_unlock_irqrestore(&sport->port.lock, flags);
}
@@ -1494,7 +1579,7 @@
return ret;
}
-static struct uart_ops lpuart_pops = {
+static const struct uart_ops lpuart_pops = {
.tx_empty = lpuart_tx_empty,
.set_mctrl = lpuart_set_mctrl,
.get_mctrl = lpuart_get_mctrl,
@@ -1513,7 +1598,7 @@
.flush_buffer = lpuart_flush_buffer,
};
-static struct uart_ops lpuart32_pops = {
+static const struct uart_ops lpuart32_pops = {
.tx_empty = lpuart32_tx_empty,
.set_mctrl = lpuart32_set_mctrl,
.get_mctrl = lpuart32_get_mctrl,
@@ -1843,6 +1928,8 @@
sport->port.ops = &lpuart_pops;
sport->port.flags = UPF_BOOT_AUTOCONF;
+ sport->port.rs485_config = lpuart_config_rs485;
+
sport->clk = devm_clk_get(&pdev->dev, "ipg");
if (IS_ERR(sport->clk)) {
ret = PTR_ERR(sport->clk);
@@ -1883,6 +1970,12 @@
dev_info(sport->port.dev, "DMA rx channel request failed, "
"operating without rx DMA\n");
+ if (of_property_read_bool(np, "linux,rs485-enabled-at-boot-time")) {
+ sport->port.rs485.flags |= SER_RS485_ENABLED;
+ sport->port.rs485.flags |= SER_RS485_RTS_ON_SEND;
+ writeb(UARTMODEM_TXRTSE, sport->port.membase + UARTMODEM);
+ }
+
return 0;
}
@@ -1923,6 +2016,32 @@
uart_suspend_port(&lpuart_reg, &sport->port);
+ if (sport->lpuart_dma_rx_use) {
+ /*
+ * EDMA driver during suspend will forcefully release any
+ * non-idle DMA channels. If port wakeup is enabled or if port
+ * is console port or 'no_console_suspend' is set the Rx DMA
+ * cannot resume as as expected, hence gracefully release the
+ * Rx DMA path before suspend and start Rx DMA path on resume.
+ */
+ if (sport->port.irq_wake) {
+ del_timer_sync(&sport->lpuart_timer);
+ lpuart_dma_rx_free(&sport->port);
+ }
+
+ /* Disable Rx DMA to use UART port as wakeup source */
+ writeb(readb(sport->port.membase + UARTCR5) & ~UARTCR5_RDMAS,
+ sport->port.membase + UARTCR5);
+ }
+
+ if (sport->lpuart_dma_tx_use) {
+ sport->dma_tx_in_progress = false;
+ dmaengine_terminate_all(sport->dma_tx_chan);
+ }
+
+ if (sport->port.suspended && !sport->port.irq_wake)
+ clk_disable_unprepare(sport->clk);
+
return 0;
}
@@ -1931,6 +2050,9 @@
struct lpuart_port *sport = dev_get_drvdata(dev);
unsigned long temp;
+ if (sport->port.suspended && !sport->port.irq_wake)
+ clk_prepare_enable(sport->clk);
+
if (sport->lpuart32) {
lpuart32_setup_watermark(sport);
temp = lpuart32_read(sport->port.membase + UARTCTRL);
@@ -1944,6 +2066,26 @@
writeb(temp, sport->port.membase + UARTCR2);
}
+ if (sport->lpuart_dma_rx_use) {
+ if (sport->port.irq_wake) {
+ if (!lpuart_start_rx_dma(sport)) {
+ sport->lpuart_dma_rx_use = true;
+ rx_dma_timer_init(sport);
+ } else {
+ sport->lpuart_dma_rx_use = false;
+ }
+ }
+ }
+
+ if (sport->dma_tx_chan && !lpuart_dma_tx_request(&sport->port)) {
+ init_waitqueue_head(&sport->dma_wait);
+ sport->lpuart_dma_tx_use = true;
+ writeb(readb(sport->port.membase + UARTCR5) |
+ UARTCR5_TDMAS, sport->port.membase + UARTCR5);
+ } else {
+ sport->lpuart_dma_tx_use = false;
+ }
+
uart_resume_port(&lpuart_reg, &sport->port);
return 0;
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 0df2b1c..a70356d 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -190,6 +190,7 @@
enum imx_uart_type {
IMX1_UART,
IMX21_UART,
+ IMX53_UART,
IMX6Q_UART,
};
@@ -222,6 +223,9 @@
struct dma_chan *dma_chan_rx, *dma_chan_tx;
struct scatterlist rx_sgl, tx_sgl[2];
void *rx_buf;
+ struct circ_buf rx_ring;
+ unsigned int rx_periods;
+ dma_cookie_t rx_cookie;
unsigned int tx_bytes;
unsigned int dma_tx_nents;
wait_queue_head_t dma_wait;
@@ -244,6 +248,10 @@
.uts_reg = IMX21_UTS,
.devtype = IMX21_UART,
},
+ [IMX53_UART] = {
+ .uts_reg = IMX21_UTS,
+ .devtype = IMX53_UART,
+ },
[IMX6Q_UART] = {
.uts_reg = IMX21_UTS,
.devtype = IMX6Q_UART,
@@ -258,6 +266,9 @@
.name = "imx21-uart",
.driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX21_UART],
}, {
+ .name = "imx53-uart",
+ .driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX53_UART],
+ }, {
.name = "imx6q-uart",
.driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX6Q_UART],
}, {
@@ -268,6 +279,7 @@
static const struct of_device_id imx_uart_dt_ids[] = {
{ .compatible = "fsl,imx6q-uart", .data = &imx_uart_devdata[IMX6Q_UART], },
+ { .compatible = "fsl,imx53-uart", .data = &imx_uart_devdata[IMX53_UART], },
{ .compatible = "fsl,imx1-uart", .data = &imx_uart_devdata[IMX1_UART], },
{ .compatible = "fsl,imx21-uart", .data = &imx_uart_devdata[IMX21_UART], },
{ /* sentinel */ }
@@ -289,6 +301,11 @@
return sport->devdata->devtype == IMX21_UART;
}
+static inline int is_imx53_uart(struct imx_port *sport)
+{
+ return sport->devdata->devtype == IMX53_UART;
+}
+
static inline int is_imx6q_uart(struct imx_port *sport)
{
return sport->devdata->devtype == IMX6Q_UART;
@@ -701,6 +718,7 @@
return IRQ_HANDLED;
}
+static void clear_rx_errors(struct imx_port *sport);
static int start_rx_dma(struct imx_port *sport);
/*
* If the RXFIFO is filled with some data, and then we
@@ -726,6 +744,11 @@
temp &= ~(UCR2_ATEN);
writel(temp, sport->port.membase + UCR2);
+ /* disable the rx errors interrupts */
+ temp = readl(sport->port.membase + UCR4);
+ temp &= ~UCR4_OREN;
+ writel(temp, sport->port.membase + UCR4);
+
/* tell the DMA to receive the data. */
start_rx_dma(sport);
}
@@ -740,12 +763,13 @@
{
unsigned int tmp = TIOCM_DSR;
unsigned usr1 = readl(sport->port.membase + USR1);
+ unsigned usr2 = readl(sport->port.membase + USR2);
if (usr1 & USR1_RTSS)
tmp |= TIOCM_CTS;
/* in DCE mode DCDIN is always 0 */
- if (!(usr1 & USR2_DCDIN))
+ if (!(usr2 & USR2_DCDIN))
tmp |= TIOCM_CAR;
if (sport->dte_mode)
@@ -932,30 +956,6 @@
}
#define RX_BUF_SIZE (PAGE_SIZE)
-static void imx_rx_dma_done(struct imx_port *sport)
-{
- unsigned long temp;
- unsigned long flags;
-
- spin_lock_irqsave(&sport->port.lock, flags);
-
- /* re-enable interrupts to get notified when new symbols are incoming */
- temp = readl(sport->port.membase + UCR1);
- temp |= UCR1_RRDYEN;
- writel(temp, sport->port.membase + UCR1);
-
- temp = readl(sport->port.membase + UCR2);
- temp |= UCR2_ATEN;
- writel(temp, sport->port.membase + UCR2);
-
- sport->dma_is_rxing = 0;
-
- /* Is the shutdown waiting for us? */
- if (waitqueue_active(&sport->dma_wait))
- wake_up(&sport->dma_wait);
-
- spin_unlock_irqrestore(&sport->port.lock, flags);
-}
/*
* There are two kinds of RX DMA interrupts(such as in the MX6Q):
@@ -972,43 +972,76 @@
struct scatterlist *sgl = &sport->rx_sgl;
struct tty_port *port = &sport->port.state->port;
struct dma_tx_state state;
+ struct circ_buf *rx_ring = &sport->rx_ring;
enum dma_status status;
- unsigned int count;
-
- /* unmap it first */
- dma_unmap_sg(sport->port.dev, sgl, 1, DMA_FROM_DEVICE);
+ unsigned int w_bytes = 0;
+ unsigned int r_bytes;
+ unsigned int bd_size;
status = dmaengine_tx_status(chan, (dma_cookie_t)0, &state);
- count = RX_BUF_SIZE - state.residue;
- dev_dbg(sport->port.dev, "We get %d bytes.\n", count);
-
- if (count) {
- if (!(sport->port.ignore_status_mask & URXD_DUMMY_READ)) {
- int bytes = tty_insert_flip_string(port, sport->rx_buf,
- count);
-
- if (bytes != count)
- sport->port.icount.buf_overrun++;
- }
- tty_flip_buffer_push(port);
- sport->port.icount.rx += count;
+ if (status == DMA_ERROR) {
+ dev_err(sport->port.dev, "DMA transaction error.\n");
+ clear_rx_errors(sport);
+ return;
}
- /*
- * Restart RX DMA directly if more data is available in order to skip
- * the roundtrip through the IRQ handler. If there is some data already
- * in the FIFO, DMA needs to be restarted soon anyways.
- *
- * Otherwise stop the DMA and reactivate FIFO IRQs to restart DMA once
- * data starts to arrive again.
- */
- if (readl(sport->port.membase + USR2) & USR2_RDR)
- start_rx_dma(sport);
- else
- imx_rx_dma_done(sport);
+ if (!(sport->port.ignore_status_mask & URXD_DUMMY_READ)) {
+
+ /*
+ * The state-residue variable represents the empty space
+ * relative to the entire buffer. Taking this in consideration
+ * the head is always calculated base on the buffer total
+ * length - DMA transaction residue. The UART script from the
+ * SDMA firmware will jump to the next buffer descriptor,
+ * once a DMA transaction if finalized (IMX53 RM - A.4.1.2.4).
+ * Taking this in consideration the tail is always at the
+ * beginning of the buffer descriptor that contains the head.
+ */
+
+ /* Calculate the head */
+ rx_ring->head = sg_dma_len(sgl) - state.residue;
+
+ /* Calculate the tail. */
+ bd_size = sg_dma_len(sgl) / sport->rx_periods;
+ rx_ring->tail = ((rx_ring->head-1) / bd_size) * bd_size;
+
+ if (rx_ring->head <= sg_dma_len(sgl) &&
+ rx_ring->head > rx_ring->tail) {
+
+ /* Move data from tail to head */
+ r_bytes = rx_ring->head - rx_ring->tail;
+
+ /* CPU claims ownership of RX DMA buffer */
+ dma_sync_sg_for_cpu(sport->port.dev, sgl, 1,
+ DMA_FROM_DEVICE);
+
+ w_bytes = tty_insert_flip_string(port,
+ sport->rx_buf + rx_ring->tail, r_bytes);
+
+ /* UART retrieves ownership of RX DMA buffer */
+ dma_sync_sg_for_device(sport->port.dev, sgl, 1,
+ DMA_FROM_DEVICE);
+
+ if (w_bytes != r_bytes)
+ sport->port.icount.buf_overrun++;
+
+ sport->port.icount.rx += w_bytes;
+ } else {
+ WARN_ON(rx_ring->head > sg_dma_len(sgl));
+ WARN_ON(rx_ring->head <= rx_ring->tail);
+ }
+ }
+
+ if (w_bytes) {
+ tty_flip_buffer_push(port);
+ dev_dbg(sport->port.dev, "We get %d bytes.\n", w_bytes);
+ }
}
+/* RX DMA buffer periods */
+#define RX_DMA_PERIODS 4
+
static int start_rx_dma(struct imx_port *sport)
{
struct scatterlist *sgl = &sport->rx_sgl;
@@ -1017,14 +1050,21 @@
struct dma_async_tx_descriptor *desc;
int ret;
+ sport->rx_ring.head = 0;
+ sport->rx_ring.tail = 0;
+ sport->rx_periods = RX_DMA_PERIODS;
+
sg_init_one(sgl, sport->rx_buf, RX_BUF_SIZE);
ret = dma_map_sg(dev, sgl, 1, DMA_FROM_DEVICE);
if (ret == 0) {
dev_err(dev, "DMA mapping error for RX.\n");
return -EINVAL;
}
- desc = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_DEV_TO_MEM,
- DMA_PREP_INTERRUPT);
+
+ desc = dmaengine_prep_dma_cyclic(chan, sg_dma_address(sgl),
+ sg_dma_len(sgl), sg_dma_len(sgl) / sport->rx_periods,
+ DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
+
if (!desc) {
dma_unmap_sg(dev, sgl, 1, DMA_FROM_DEVICE);
dev_err(dev, "We cannot prepare for the RX slave dma!\n");
@@ -1034,11 +1074,36 @@
desc->callback_param = sport;
dev_dbg(dev, "RX: prepare for the DMA.\n");
- dmaengine_submit(desc);
+ sport->rx_cookie = dmaengine_submit(desc);
dma_async_issue_pending(chan);
return 0;
}
+static void clear_rx_errors(struct imx_port *sport)
+{
+ unsigned int status_usr1, status_usr2;
+
+ status_usr1 = readl(sport->port.membase + USR1);
+ status_usr2 = readl(sport->port.membase + USR2);
+
+ if (status_usr2 & USR2_BRCD) {
+ sport->port.icount.brk++;
+ writel(USR2_BRCD, sport->port.membase + USR2);
+ } else if (status_usr1 & USR1_FRAMERR) {
+ sport->port.icount.frame++;
+ writel(USR1_FRAMERR, sport->port.membase + USR1);
+ } else if (status_usr1 & USR1_PARITYERR) {
+ sport->port.icount.parity++;
+ writel(USR1_PARITYERR, sport->port.membase + USR1);
+ }
+
+ if (status_usr2 & USR2_ORE) {
+ sport->port.icount.overrun++;
+ writel(USR2_ORE, sport->port.membase + USR2);
+ }
+
+}
+
#define TXTL_DEFAULT 2 /* reset default */
#define RXTL_DEFAULT 1 /* reset default */
#define TXTL_DMA 8 /* DMA burst setting */
@@ -1058,14 +1123,16 @@
static void imx_uart_dma_exit(struct imx_port *sport)
{
if (sport->dma_chan_rx) {
+ dmaengine_terminate_sync(sport->dma_chan_rx);
dma_release_channel(sport->dma_chan_rx);
sport->dma_chan_rx = NULL;
-
+ sport->rx_cookie = -EINVAL;
kfree(sport->rx_buf);
sport->rx_buf = NULL;
}
if (sport->dma_chan_tx) {
+ dmaengine_terminate_sync(sport->dma_chan_tx);
dma_release_channel(sport->dma_chan_tx);
sport->dma_chan_tx = NULL;
}
@@ -1103,6 +1170,7 @@
ret = -ENOMEM;
goto err;
}
+ sport->rx_ring.buf = sport->rx_buf;
/* Prepare for TX : */
sport->dma_chan_tx = dma_request_slave_channel(dev, "tx");
@@ -1201,8 +1269,7 @@
writel(temp & ~UCR4_DREN, sport->port.membase + UCR4);
/* Can we enable the DMA support? */
- if (is_imx6q_uart(sport) && !uart_console(port) &&
- !sport->dma_is_inited)
+ if (!uart_console(port) && !sport->dma_is_inited)
imx_uart_dma_init(sport);
spin_lock_irqsave(&sport->port.lock, flags);
@@ -1283,17 +1350,11 @@
unsigned long flags;
if (sport->dma_is_enabled) {
- int ret;
+ sport->dma_is_rxing = 0;
+ sport->dma_is_txing = 0;
+ dmaengine_terminate_sync(sport->dma_chan_tx);
+ dmaengine_terminate_sync(sport->dma_chan_rx);
- /* We have to wait for the DMA to finish. */
- ret = wait_event_interruptible(sport->dma_wait,
- !sport->dma_is_rxing && !sport->dma_is_txing);
- if (ret != 0) {
- sport->dma_is_rxing = 0;
- sport->dma_is_txing = 0;
- dmaengine_terminate_all(sport->dma_chan_tx);
- dmaengine_terminate_all(sport->dma_chan_rx);
- }
spin_lock_irqsave(&sport->port.lock, flags);
imx_stop_tx(port);
imx_stop_rx(port);
@@ -1690,7 +1751,7 @@
return 0;
}
-static struct uart_ops imx_pops = {
+static const struct uart_ops imx_pops = {
.tx_empty = imx_tx_empty,
.set_mctrl = imx_set_mctrl,
.get_mctrl = imx_get_mctrl,
@@ -2077,8 +2138,10 @@
/* For register access, we only need to enable the ipg clock. */
ret = clk_prepare_enable(sport->clk_ipg);
- if (ret)
+ if (ret) {
+ dev_err(&pdev->dev, "failed to enable per clk: %d\n", ret);
return ret;
+ }
/* Disable interrupts before requesting them */
reg = readl_relaxed(sport->port.membase + UCR1);
@@ -2095,18 +2158,26 @@
if (txirq > 0) {
ret = devm_request_irq(&pdev->dev, rxirq, imx_rxint, 0,
dev_name(&pdev->dev), sport);
- if (ret)
+ if (ret) {
+ dev_err(&pdev->dev, "failed to request rx irq: %d\n",
+ ret);
return ret;
+ }
ret = devm_request_irq(&pdev->dev, txirq, imx_txint, 0,
dev_name(&pdev->dev), sport);
- if (ret)
+ if (ret) {
+ dev_err(&pdev->dev, "failed to request tx irq: %d\n",
+ ret);
return ret;
+ }
} else {
ret = devm_request_irq(&pdev->dev, rxirq, imx_int, 0,
dev_name(&pdev->dev), sport);
- if (ret)
+ if (ret) {
+ dev_err(&pdev->dev, "failed to request irq: %d\n", ret);
return ret;
+ }
}
imx_ports[sport->port.line] = sport;
diff --git a/drivers/tty/serial/jsm/jsm_tty.c b/drivers/tty/serial/jsm/jsm_tty.c
index c5ddfe5..ec7d838 100644
--- a/drivers/tty/serial/jsm/jsm_tty.c
+++ b/drivers/tty/serial/jsm/jsm_tty.c
@@ -346,7 +346,7 @@
port->type = PORT_JSM;
}
-static struct uart_ops jsm_ops = {
+static const struct uart_ops jsm_ops = {
.tx_empty = jsm_tty_tx_empty,
.set_mctrl = jsm_tty_set_mctrl,
.get_mctrl = jsm_tty_get_mctrl,
diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c
index 5c4c280..ace8264 100644
--- a/drivers/tty/serial/max3100.c
+++ b/drivers/tty/serial/max3100.c
@@ -712,7 +712,7 @@
dev_dbg(&s->spi->dev, "%s\n", __func__);
}
-static struct uart_ops max3100_ops = {
+static const struct uart_ops max3100_ops = {
.tx_empty = max3100_tx_empty,
.set_mctrl = max3100_set_mctrl,
.get_mctrl = max3100_get_mctrl,
diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index 9360801..8a3e926 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -1329,9 +1329,9 @@
const struct spi_device_id *id_entry = spi_get_device_id(spi);
devtype = (struct max310x_devtype *)id_entry->driver_data;
- flags = IRQF_TRIGGER_FALLING;
}
+ flags = IRQF_TRIGGER_FALLING;
regcfg.max_register = devtype->nr * 0x20 - 1;
regmap = devm_regmap_init_spi(spi, ®cfg);
diff --git a/drivers/tty/serial/men_z135_uart.c b/drivers/tty/serial/men_z135_uart.c
index a44290e..e72ea61 100644
--- a/drivers/tty/serial/men_z135_uart.c
+++ b/drivers/tty/serial/men_z135_uart.c
@@ -775,7 +775,7 @@
return -EINVAL;
}
-static struct uart_ops men_z135_ops = {
+static const struct uart_ops men_z135_ops = {
.tx_empty = men_z135_tx_empty,
.set_mctrl = men_z135_set_mctrl,
.get_mctrl = men_z135_get_mctrl,
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index eb54e5c..770454e 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -1317,7 +1317,7 @@
mxs_clr(AUART_LINECTRL_BRK, s, REG_LINECTRL);
}
-static struct uart_ops mxs_auart_ops = {
+static const struct uart_ops mxs_auart_ops = {
.tx_empty = mxs_auart_tx_empty,
.start_tx = mxs_auart_start_tx,
.stop_tx = mxs_auart_stop_tx,
@@ -1510,10 +1510,7 @@
if (!is_asm9260_auart(s)) {
s->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(s->clk))
- return PTR_ERR(s->clk);
-
- return 0;
+ return PTR_ERR_OR_ZERO(s->clk);
}
s->clk = devm_clk_get(s->dev, "mod");
@@ -1537,16 +1534,20 @@
err = clk_set_rate(s->clk, clk_get_rate(s->clk_ahb));
if (err) {
dev_err(s->dev, "Failed to set rate!\n");
- return err;
+ goto disable_clk_ahb;
}
err = clk_prepare_enable(s->clk);
if (err) {
dev_err(s->dev, "Failed to enable clk!\n");
- return err;
+ goto disable_clk_ahb;
}
return 0;
+
+disable_clk_ahb:
+ clk_disable_unprepare(s->clk_ahb);
+ return err;
}
/*
diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index ea4ffc2..d391650 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -31,6 +31,7 @@
#include <linux/dmi.h>
#include <linux/nmi.h>
#include <linux/delay.h>
+#include <linux/of.h>
#include <linux/debugfs.h>
#include <linux/dmaengine.h>
@@ -1603,7 +1604,7 @@
}
#endif /* CONFIG_CONSOLE_POLL */
-static struct uart_ops pch_uart_ops = {
+static const struct uart_ops pch_uart_ops = {
.tx_empty = pch_uart_tx_empty,
.set_mctrl = pch_uart_set_mctrl,
.get_mctrl = pch_uart_get_mctrl,
@@ -1826,6 +1827,10 @@
priv->trigger_level = 1;
priv->fcr = 0;
+ if (pdev->dev.of_node)
+ of_property_read_u32(pdev->dev.of_node, "clock-frequency"
+ , &user_uartclk);
+
#ifdef CONFIG_SERIAL_PCH_UART_CONSOLE
pch_uart_ports[board->line_no] = priv;
#endif
diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index ae2095a..f44615f 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -1577,7 +1577,7 @@
}
-#ifdef CONFIG_CPU_FREQ
+#ifdef CONFIG_ARM_S3C24XX_CPUFREQ
static int s3c24xx_serial_cpufreq_transition(struct notifier_block *nb,
unsigned long val, void *data)
diff --git a/drivers/tty/serial/samsung.h b/drivers/tty/serial/samsung.h
index 2ae4fce..a04acef 100644
--- a/drivers/tty/serial/samsung.h
+++ b/drivers/tty/serial/samsung.h
@@ -102,7 +102,7 @@
struct s3c24xx_uart_dma *dma;
-#ifdef CONFIG_CPU_FREQ
+#ifdef CONFIG_ARM_S3C24XX_CPUFREQ
struct notifier_block freq_transition;
#endif
};
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index f36e6df..a9d94f7 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -1205,6 +1205,10 @@
}
#endif
+ /* reset device, purging any pending irq / data */
+ regmap_write(s->regmap, SC16IS7XX_IOCONTROL_REG << SC16IS7XX_REG_SHIFT,
+ SC16IS7XX_IOCONTROL_SRESET_BIT);
+
for (i = 0; i < devtype->nr_uart; ++i) {
s->p[i].line = i;
/* Initialize port data */
@@ -1234,6 +1238,22 @@
init_kthread_work(&s->p[i].reg_work, sc16is7xx_reg_proc);
/* Register port */
uart_add_one_port(&sc16is7xx_uart, &s->p[i].port);
+
+ /* Enable EFR */
+ sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG,
+ SC16IS7XX_LCR_CONF_MODE_B);
+
+ regcache_cache_bypass(s->regmap, true);
+
+ /* Enable write access to enhanced features */
+ sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_EFR_REG,
+ SC16IS7XX_EFR_ENABLE_BIT);
+
+ regcache_cache_bypass(s->regmap, false);
+
+ /* Restore access to general registers */
+ sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG, 0x00);
+
/* Go to suspend mode */
sc16is7xx_power(&s->p[i].port, 0);
}
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 9fc1533..6e4f636 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -235,18 +235,9 @@
if (tty_port_initialized(port))
return 0;
- /*
- * Set the TTY IO error marker - we will only clear this
- * once we have successfully opened the port.
- */
- set_bit(TTY_IO_ERROR, &tty->flags);
-
retval = uart_port_startup(tty, state, init_hw);
- if (!retval) {
- tty_port_set_initialized(port, 1);
- clear_bit(TTY_IO_ERROR, &tty->flags);
- } else if (retval > 0)
- retval = 0;
+ if (retval)
+ set_bit(TTY_IO_ERROR, &tty->flags);
return retval;
}
@@ -972,8 +963,11 @@
}
uart_change_speed(tty, state, NULL);
}
- } else
+ } else {
retval = uart_startup(tty, state, 1);
+ if (retval > 0)
+ retval = 0;
+ }
exit:
return retval;
}
@@ -1139,6 +1133,8 @@
uport->ops->config_port(uport, flags);
ret = uart_startup(tty, state, 1);
+ if (ret > 0)
+ ret = 0;
}
out:
mutex_unlock(&port->mutex);
@@ -1465,7 +1461,6 @@
{
struct uart_state *state = tty->driver_data;
struct tty_port *port;
- struct uart_port *uport;
if (!state) {
struct uart_driver *drv = tty->driver->driver_state;
@@ -1481,56 +1476,36 @@
port = &state->port;
pr_debug("uart_close(%d) called\n", tty->index);
- if (tty_port_close_start(port, tty, filp) == 0)
- return;
+ tty_port_close(tty->port, tty, filp);
+}
- mutex_lock(&port->mutex);
- uport = uart_port_check(state);
+static void uart_tty_port_shutdown(struct tty_port *port)
+{
+ struct uart_state *state = container_of(port, struct uart_state, port);
+ struct uart_port *uport = uart_port_check(state);
/*
* At this point, we stop accepting input. To do this, we
* disable the receive line status interrupts.
*/
- if (tty_port_initialized(port) &&
- !WARN(!uport, "detached port still initialized!\n")) {
- spin_lock_irq(&uport->lock);
- uport->ops->stop_rx(uport);
- spin_unlock_irq(&uport->lock);
- /*
- * Before we drop DTR, make sure the UART transmitter
- * has completely drained; this is especially
- * important if there is a transmit FIFO!
- */
- uart_wait_until_sent(tty, uport->timeout);
- }
+ if (WARN(!uport, "detached port still initialized!\n"))
+ return;
- uart_shutdown(tty, state);
- tty_port_tty_set(port, NULL);
+ spin_lock_irq(&uport->lock);
+ uport->ops->stop_rx(uport);
+ spin_unlock_irq(&uport->lock);
- spin_lock_irq(&port->lock);
-
- if (port->blocked_open) {
- spin_unlock_irq(&port->lock);
- if (port->close_delay)
- msleep_interruptible(jiffies_to_msecs(port->close_delay));
- spin_lock_irq(&port->lock);
- } else if (uport && !uart_console(uport)) {
- spin_unlock_irq(&port->lock);
- uart_change_pm(state, UART_PM_STATE_OFF);
- spin_lock_irq(&port->lock);
- }
- spin_unlock_irq(&port->lock);
- tty_port_set_active(port, 0);
+ uart_port_shutdown(port);
/*
- * Wake up anyone trying to open this port.
+ * It's possible for shutdown to be called after suspend if we get
+ * a DCD drop (hangup) at just the right time. Clear suspended bit so
+ * we don't try to resume a port that has been shutdown.
*/
- wake_up_interruptible(&port->open_wait);
+ tty_port_set_suspended(port, 0);
- mutex_unlock(&port->mutex);
+ uart_change_pm(state, UART_PM_STATE_OFF);
- tty_ldisc_flush(tty);
- tty->closing = 0;
}
static void uart_wait_until_sent(struct tty_struct *tty, int timeout)
@@ -1711,52 +1686,31 @@
struct uart_driver *drv = tty->driver->driver_state;
int retval, line = tty->index;
struct uart_state *state = drv->state + line;
- struct tty_port *port = &state->port;
- struct uart_port *uport;
-
- pr_debug("uart_open(%d) called\n", line);
-
- spin_lock_irq(&port->lock);
- ++port->count;
- spin_unlock_irq(&port->lock);
-
- /*
- * We take the semaphore here to guarantee that we won't be re-entered
- * while allocating the state structure, or while we request any IRQs
- * that the driver may need. This also has the nice side-effect that
- * it delays the action of uart_hangup, so we can guarantee that
- * state->port.tty will always contain something reasonable.
- */
- if (mutex_lock_interruptible(&port->mutex)) {
- retval = -ERESTARTSYS;
- goto end;
- }
-
- uport = uart_port_check(state);
- if (!uport || uport->flags & UPF_DEAD) {
- retval = -ENXIO;
- goto err_unlock;
- }
tty->driver_data = state;
- uport->state = state;
+
+ retval = tty_port_open(&state->port, tty, filp);
+ if (retval > 0)
+ retval = 0;
+
+ return retval;
+}
+
+static int uart_port_activate(struct tty_port *port, struct tty_struct *tty)
+{
+ struct uart_state *state = container_of(port, struct uart_state, port);
+ struct uart_port *uport;
+
+ uport = uart_port_check(state);
+ if (!uport || uport->flags & UPF_DEAD)
+ return -ENXIO;
+
port->low_latency = (uport->flags & UPF_LOW_LATENCY) ? 1 : 0;
- tty_port_tty_set(port, tty);
/*
* Start up the serial port.
*/
- retval = uart_startup(tty, state, 0);
-
- /*
- * If we succeeded, wait until the port is ready.
- */
-err_unlock:
- mutex_unlock(&port->mutex);
- if (retval == 0)
- retval = tty_port_block_til_ready(port, tty, filp);
-end:
- return retval;
+ return uart_startup(tty, state, 0);
}
static const char *uart_type(struct uart_port *port)
@@ -1940,7 +1894,7 @@
*
* Returns 0 on success or -EINVAL on failure
*/
-int uart_parse_earlycon(char *p, unsigned char *iotype, unsigned long *addr,
+int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr,
char **options)
{
if (strncmp(p, "mmio,", 5) == 0) {
@@ -1968,7 +1922,11 @@
return -EINVAL;
}
- *addr = simple_strtoul(p, NULL, 0);
+ /*
+ * Before you replace it with kstrtoull(), think about options separator
+ * (',') it will not tolerate
+ */
+ *addr = simple_strtoull(p, NULL, 0);
p = strchr(p, ',');
if (p)
p++;
@@ -2470,6 +2428,8 @@
static const struct tty_port_operations uart_port_ops = {
.carrier_raised = uart_carrier_raised,
.dtr_rts = uart_dtr_rts,
+ .activate = uart_port_activate,
+ .shutdown = uart_tty_port_shutdown,
};
/**
@@ -2786,6 +2746,8 @@
uport->cons = drv->cons;
uport->minor = drv->tty_driver->minor_start + uport->line;
+ port->console = uart_console(uport);
+
/*
* If this port is a console, then the spinlock is already
* initialised.
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index d86eee3..4b26252 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -2533,7 +2533,7 @@
return 0;
}
-static struct uart_ops sci_uart_ops = {
+static const struct uart_ops sci_uart_ops = {
.tx_empty = sci_tx_empty,
.set_mctrl = sci_set_mctrl,
.get_mctrl = sci_get_mctrl,
diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c
index 2d78cb3..379e5bd 100644
--- a/drivers/tty/serial/st-asc.c
+++ b/drivers/tty/serial/st-asc.c
@@ -639,7 +639,7 @@
/*---------------------------------------------------------------------*/
-static struct uart_ops asc_uart_ops = {
+static const struct uart_ops asc_uart_ops = {
.tx_empty = asc_tx_empty,
.set_mctrl = asc_set_mctrl,
.get_mctrl = asc_get_mctrl,
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index f89d1f7..0338562 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -1,6 +1,7 @@
/*
* Copyright (C) Maxime Coquelin 2015
- * Author: Maxime Coquelin <mcoquelin.stm32@gmail.com>
+ * Authors: Maxime Coquelin <mcoquelin.stm32@gmail.com>
+ * Gerald Baeza <gerald.baeza@st.com>
* License terms: GNU General Public License (GPL), version 2
*
* Inspired by st-asc.c from STMicroelectronics (c)
@@ -10,120 +11,31 @@
#define SUPPORT_SYSRQ
#endif
-#include <linux/module.h>
-#include <linux/serial.h>
+#include <linux/clk.h>
#include <linux/console.h>
-#include <linux/sysrq.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/pm_runtime.h>
+#include <linux/dma-direction.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/irq.h>
+#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/serial_core.h>
-#include <linux/clk.h>
+#include <linux/serial.h>
+#include <linux/spinlock.h>
+#include <linux/sysrq.h>
+#include <linux/tty_flip.h>
+#include <linux/tty.h>
-#define DRIVER_NAME "stm32-usart"
-
-/* Register offsets */
-#define USART_SR 0x00
-#define USART_DR 0x04
-#define USART_BRR 0x08
-#define USART_CR1 0x0c
-#define USART_CR2 0x10
-#define USART_CR3 0x14
-#define USART_GTPR 0x18
-
-/* USART_SR */
-#define USART_SR_PE BIT(0)
-#define USART_SR_FE BIT(1)
-#define USART_SR_NF BIT(2)
-#define USART_SR_ORE BIT(3)
-#define USART_SR_IDLE BIT(4)
-#define USART_SR_RXNE BIT(5)
-#define USART_SR_TC BIT(6)
-#define USART_SR_TXE BIT(7)
-#define USART_SR_LBD BIT(8)
-#define USART_SR_CTS BIT(9)
-#define USART_SR_ERR_MASK (USART_SR_LBD | USART_SR_ORE | \
- USART_SR_FE | USART_SR_PE)
-/* Dummy bits */
-#define USART_SR_DUMMY_RX BIT(16)
-
-/* USART_DR */
-#define USART_DR_MASK GENMASK(8, 0)
-
-/* USART_BRR */
-#define USART_BRR_DIV_F_MASK GENMASK(3, 0)
-#define USART_BRR_DIV_M_MASK GENMASK(15, 4)
-#define USART_BRR_DIV_M_SHIFT 4
-
-/* USART_CR1 */
-#define USART_CR1_SBK BIT(0)
-#define USART_CR1_RWU BIT(1)
-#define USART_CR1_RE BIT(2)
-#define USART_CR1_TE BIT(3)
-#define USART_CR1_IDLEIE BIT(4)
-#define USART_CR1_RXNEIE BIT(5)
-#define USART_CR1_TCIE BIT(6)
-#define USART_CR1_TXEIE BIT(7)
-#define USART_CR1_PEIE BIT(8)
-#define USART_CR1_PS BIT(9)
-#define USART_CR1_PCE BIT(10)
-#define USART_CR1_WAKE BIT(11)
-#define USART_CR1_M BIT(12)
-#define USART_CR1_UE BIT(13)
-#define USART_CR1_OVER8 BIT(15)
-#define USART_CR1_IE_MASK GENMASK(8, 4)
-
-/* USART_CR2 */
-#define USART_CR2_ADD_MASK GENMASK(3, 0)
-#define USART_CR2_LBDL BIT(5)
-#define USART_CR2_LBDIE BIT(6)
-#define USART_CR2_LBCL BIT(8)
-#define USART_CR2_CPHA BIT(9)
-#define USART_CR2_CPOL BIT(10)
-#define USART_CR2_CLKEN BIT(11)
-#define USART_CR2_STOP_2B BIT(13)
-#define USART_CR2_STOP_MASK GENMASK(13, 12)
-#define USART_CR2_LINEN BIT(14)
-
-/* USART_CR3 */
-#define USART_CR3_EIE BIT(0)
-#define USART_CR3_IREN BIT(1)
-#define USART_CR3_IRLP BIT(2)
-#define USART_CR3_HDSEL BIT(3)
-#define USART_CR3_NACK BIT(4)
-#define USART_CR3_SCEN BIT(5)
-#define USART_CR3_DMAR BIT(6)
-#define USART_CR3_DMAT BIT(7)
-#define USART_CR3_RTSE BIT(8)
-#define USART_CR3_CTSE BIT(9)
-#define USART_CR3_CTSIE BIT(10)
-#define USART_CR3_ONEBIT BIT(11)
-
-/* USART_GTPR */
-#define USART_GTPR_PSC_MASK GENMASK(7, 0)
-#define USART_GTPR_GT_MASK GENMASK(15, 8)
-
-#define DRIVER_NAME "stm32-usart"
-#define STM32_SERIAL_NAME "ttyS"
-#define STM32_MAX_PORTS 6
-
-struct stm32_port {
- struct uart_port port;
- struct clk *clk;
- bool hw_flow_control;
-};
-
-static struct stm32_port stm32_ports[STM32_MAX_PORTS];
-static struct uart_driver stm32_usart_driver;
+#include "stm32-usart.h"
static void stm32_stop_tx(struct uart_port *port);
+static void stm32_transmit_chars(struct uart_port *port);
static inline struct stm32_port *to_stm32_port(struct uart_port *port)
{
@@ -148,19 +60,64 @@
writel_relaxed(val, port->membase + reg);
}
-static void stm32_receive_chars(struct uart_port *port)
+static int stm32_pending_rx(struct uart_port *port, u32 *sr, int *last_res,
+ bool threaded)
+{
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+ enum dma_status status;
+ struct dma_tx_state state;
+
+ *sr = readl_relaxed(port->membase + ofs->isr);
+
+ if (threaded && stm32_port->rx_ch) {
+ status = dmaengine_tx_status(stm32_port->rx_ch,
+ stm32_port->rx_ch->cookie,
+ &state);
+ if ((status == DMA_IN_PROGRESS) &&
+ (*last_res != state.residue))
+ return 1;
+ else
+ return 0;
+ } else if (*sr & USART_SR_RXNE) {
+ return 1;
+ }
+ return 0;
+}
+
+static unsigned long
+stm32_get_char(struct uart_port *port, u32 *sr, int *last_res)
+{
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+ unsigned long c;
+
+ if (stm32_port->rx_ch) {
+ c = stm32_port->rx_buf[RX_BUF_L - (*last_res)--];
+ if ((*last_res) == 0)
+ *last_res = RX_BUF_L;
+ return c;
+ } else {
+ return readl_relaxed(port->membase + ofs->rdr);
+ }
+}
+
+static void stm32_receive_chars(struct uart_port *port, bool threaded)
{
struct tty_port *tport = &port->state->port;
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
unsigned long c;
u32 sr;
char flag;
+ static int last_res = RX_BUF_L;
if (port->irq_wake)
pm_wakeup_event(tport->tty->dev, 0);
- while ((sr = readl_relaxed(port->membase + USART_SR)) & USART_SR_RXNE) {
+ while (stm32_pending_rx(port, &sr, &last_res, threaded)) {
sr |= USART_SR_DUMMY_RX;
- c = readl_relaxed(port->membase + USART_DR);
+ c = stm32_get_char(port, &sr, &last_res);
flag = TTY_NORMAL;
port->icount.rx++;
@@ -170,6 +127,10 @@
if (uart_handle_break(port))
continue;
} else if (sr & USART_SR_ORE) {
+ if (ofs->icr != UNDEF_REG)
+ writel_relaxed(USART_ICR_ORECF,
+ port->membase +
+ ofs->icr);
port->icount.overrun++;
} else if (sr & USART_SR_PE) {
port->icount.parity++;
@@ -197,14 +158,138 @@
spin_lock(&port->lock);
}
+static void stm32_tx_dma_complete(void *arg)
+{
+ struct uart_port *port = arg;
+ struct stm32_port *stm32port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32port->info->ofs;
+ unsigned int isr;
+ int ret;
+
+ ret = readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr,
+ isr,
+ (isr & USART_SR_TC),
+ 10, 100000);
+
+ if (ret)
+ dev_err(port->dev, "terminal count not set\n");
+
+ if (ofs->icr == UNDEF_REG)
+ stm32_clr_bits(port, ofs->isr, USART_SR_TC);
+ else
+ stm32_set_bits(port, ofs->icr, USART_CR_TC);
+
+ stm32_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
+ stm32port->tx_dma_busy = false;
+
+ /* Let's see if we have pending data to send */
+ stm32_transmit_chars(port);
+}
+
+static void stm32_transmit_chars_pio(struct uart_port *port)
+{
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+ struct circ_buf *xmit = &port->state->xmit;
+ unsigned int isr;
+ int ret;
+
+ if (stm32_port->tx_dma_busy) {
+ stm32_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
+ stm32_port->tx_dma_busy = false;
+ }
+
+ ret = readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr,
+ isr,
+ (isr & USART_SR_TXE),
+ 10, 100);
+
+ if (ret)
+ dev_err(port->dev, "tx empty not set\n");
+
+ stm32_set_bits(port, ofs->cr1, USART_CR1_TXEIE);
+
+ writel_relaxed(xmit->buf[xmit->tail], port->membase + ofs->tdr);
+ xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+ port->icount.tx++;
+}
+
+static void stm32_transmit_chars_dma(struct uart_port *port)
+{
+ struct stm32_port *stm32port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32port->info->ofs;
+ struct circ_buf *xmit = &port->state->xmit;
+ struct dma_async_tx_descriptor *desc = NULL;
+ dma_cookie_t cookie;
+ unsigned int count, i;
+
+ if (stm32port->tx_dma_busy)
+ return;
+
+ stm32port->tx_dma_busy = true;
+
+ count = uart_circ_chars_pending(xmit);
+
+ if (count > TX_BUF_L)
+ count = TX_BUF_L;
+
+ if (xmit->tail < xmit->head) {
+ memcpy(&stm32port->tx_buf[0], &xmit->buf[xmit->tail], count);
+ } else {
+ size_t one = UART_XMIT_SIZE - xmit->tail;
+ size_t two;
+
+ if (one > count)
+ one = count;
+ two = count - one;
+
+ memcpy(&stm32port->tx_buf[0], &xmit->buf[xmit->tail], one);
+ if (two)
+ memcpy(&stm32port->tx_buf[one], &xmit->buf[0], two);
+ }
+
+ desc = dmaengine_prep_slave_single(stm32port->tx_ch,
+ stm32port->tx_dma_buf,
+ count,
+ DMA_MEM_TO_DEV,
+ DMA_PREP_INTERRUPT);
+
+ if (!desc) {
+ for (i = count; i > 0; i--)
+ stm32_transmit_chars_pio(port);
+ return;
+ }
+
+ desc->callback = stm32_tx_dma_complete;
+ desc->callback_param = port;
+
+ /* Push current DMA TX transaction in the pending queue */
+ cookie = dmaengine_submit(desc);
+
+ /* Issue pending DMA TX requests */
+ dma_async_issue_pending(stm32port->tx_ch);
+
+ stm32_clr_bits(port, ofs->isr, USART_SR_TC);
+ stm32_set_bits(port, ofs->cr3, USART_CR3_DMAT);
+
+ xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
+ port->icount.tx += count;
+}
+
static void stm32_transmit_chars(struct uart_port *port)
{
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
struct circ_buf *xmit = &port->state->xmit;
if (port->x_char) {
- writel_relaxed(port->x_char, port->membase + USART_DR);
+ if (stm32_port->tx_dma_busy)
+ stm32_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
+ writel_relaxed(port->x_char, port->membase + ofs->tdr);
port->x_char = 0;
port->icount.tx++;
+ if (stm32_port->tx_dma_busy)
+ stm32_set_bits(port, ofs->cr3, USART_CR3_DMAT);
return;
}
@@ -218,9 +303,10 @@
return;
}
- writel_relaxed(xmit->buf[xmit->tail], port->membase + USART_DR);
- xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
- port->icount.tx++;
+ if (stm32_port->tx_ch)
+ stm32_transmit_chars_dma(port);
+ else
+ stm32_transmit_chars_pio(port);
if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
uart_write_wakeup(port);
@@ -232,34 +318,60 @@
static irqreturn_t stm32_interrupt(int irq, void *ptr)
{
struct uart_port *port = ptr;
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
u32 sr;
spin_lock(&port->lock);
- sr = readl_relaxed(port->membase + USART_SR);
+ sr = readl_relaxed(port->membase + ofs->isr);
- if (sr & USART_SR_RXNE)
- stm32_receive_chars(port);
+ if ((sr & USART_SR_RXNE) && !(stm32_port->rx_ch))
+ stm32_receive_chars(port, false);
- if (sr & USART_SR_TXE)
+ if ((sr & USART_SR_TXE) && !(stm32_port->tx_ch))
stm32_transmit_chars(port);
spin_unlock(&port->lock);
+ if (stm32_port->rx_ch)
+ return IRQ_WAKE_THREAD;
+ else
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t stm32_threaded_interrupt(int irq, void *ptr)
+{
+ struct uart_port *port = ptr;
+ struct stm32_port *stm32_port = to_stm32_port(port);
+
+ spin_lock(&port->lock);
+
+ if (stm32_port->rx_ch)
+ stm32_receive_chars(port, true);
+
+ spin_unlock(&port->lock);
+
return IRQ_HANDLED;
}
static unsigned int stm32_tx_empty(struct uart_port *port)
{
- return readl_relaxed(port->membase + USART_SR) & USART_SR_TXE;
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+
+ return readl_relaxed(port->membase + ofs->isr) & USART_SR_TXE;
}
static void stm32_set_mctrl(struct uart_port *port, unsigned int mctrl)
{
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+
if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS))
- stm32_set_bits(port, USART_CR3, USART_CR3_RTSE);
+ stm32_set_bits(port, ofs->cr3, USART_CR3_RTSE);
else
- stm32_clr_bits(port, USART_CR3, USART_CR3_RTSE);
+ stm32_clr_bits(port, ofs->cr3, USART_CR3_RTSE);
}
static unsigned int stm32_get_mctrl(struct uart_port *port)
@@ -271,7 +383,10 @@
/* Transmit stop */
static void stm32_stop_tx(struct uart_port *port)
{
- stm32_clr_bits(port, USART_CR1, USART_CR1_TXEIE);
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+
+ stm32_clr_bits(port, ofs->cr1, USART_CR1_TXEIE);
}
/* There are probably characters waiting to be transmitted. */
@@ -282,33 +397,40 @@
if (uart_circ_empty(xmit))
return;
- stm32_set_bits(port, USART_CR1, USART_CR1_TXEIE | USART_CR1_TE);
+ stm32_transmit_chars(port);
}
/* Throttle the remote when input buffer is about to overflow. */
static void stm32_throttle(struct uart_port *port)
{
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
unsigned long flags;
spin_lock_irqsave(&port->lock, flags);
- stm32_clr_bits(port, USART_CR1, USART_CR1_RXNEIE);
+ stm32_clr_bits(port, ofs->cr1, USART_CR1_RXNEIE);
spin_unlock_irqrestore(&port->lock, flags);
}
/* Unthrottle the remote, the input buffer can now accept data. */
static void stm32_unthrottle(struct uart_port *port)
{
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
unsigned long flags;
spin_lock_irqsave(&port->lock, flags);
- stm32_set_bits(port, USART_CR1, USART_CR1_RXNEIE);
+ stm32_set_bits(port, ofs->cr1, USART_CR1_RXNEIE);
spin_unlock_irqrestore(&port->lock, flags);
}
/* Receive stop */
static void stm32_stop_rx(struct uart_port *port)
{
- stm32_clr_bits(port, USART_CR1, USART_CR1_RXNEIE);
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+
+ stm32_clr_bits(port, ofs->cr1, USART_CR1_RXNEIE);
}
/* Handle breaks - ignored by us */
@@ -318,26 +440,34 @@
static int stm32_startup(struct uart_port *port)
{
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
const char *name = to_platform_device(port->dev)->name;
u32 val;
int ret;
- ret = request_irq(port->irq, stm32_interrupt, 0, name, port);
+ ret = request_threaded_irq(port->irq, stm32_interrupt,
+ stm32_threaded_interrupt,
+ IRQF_NO_SUSPEND, name, port);
if (ret)
return ret;
val = USART_CR1_RXNEIE | USART_CR1_TE | USART_CR1_RE;
- stm32_set_bits(port, USART_CR1, val);
+ stm32_set_bits(port, ofs->cr1, val);
return 0;
}
static void stm32_shutdown(struct uart_port *port)
{
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+ struct stm32_usart_config *cfg = &stm32_port->info->cfg;
u32 val;
val = USART_CR1_TXEIE | USART_CR1_RXNEIE | USART_CR1_TE | USART_CR1_RE;
- stm32_set_bits(port, USART_CR1, val);
+ val |= BIT(cfg->uart_enable_bit);
+ stm32_clr_bits(port, ofs->cr1, val);
free_irq(port->irq, port);
}
@@ -346,6 +476,8 @@
struct ktermios *old)
{
struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+ struct stm32_usart_config *cfg = &stm32_port->info->cfg;
unsigned int baud;
u32 usartdiv, mantissa, fraction, oversampling;
tcflag_t cflag = termios->c_cflag;
@@ -360,9 +492,10 @@
spin_lock_irqsave(&port->lock, flags);
/* Stop serial port and reset value */
- writel_relaxed(0, port->membase + USART_CR1);
+ writel_relaxed(0, port->membase + ofs->cr1);
- cr1 = USART_CR1_TE | USART_CR1_RE | USART_CR1_UE | USART_CR1_RXNEIE;
+ cr1 = USART_CR1_TE | USART_CR1_RE | USART_CR1_RXNEIE;
+ cr1 |= BIT(cfg->uart_enable_bit);
cr2 = 0;
cr3 = 0;
@@ -371,8 +504,12 @@
if (cflag & PARENB) {
cr1 |= USART_CR1_PCE;
- if ((cflag & CSIZE) == CS8)
- cr1 |= USART_CR1_M;
+ if ((cflag & CSIZE) == CS8) {
+ if (cfg->has_7bits_data)
+ cr1 |= USART_CR1_M0;
+ else
+ cr1 |= USART_CR1_M;
+ }
}
if (cflag & PARODD)
@@ -394,15 +531,15 @@
*/
if (usartdiv < 16) {
oversampling = 8;
- stm32_set_bits(port, USART_CR1, USART_CR1_OVER8);
+ stm32_set_bits(port, ofs->cr1, USART_CR1_OVER8);
} else {
oversampling = 16;
- stm32_clr_bits(port, USART_CR1, USART_CR1_OVER8);
+ stm32_clr_bits(port, ofs->cr1, USART_CR1_OVER8);
}
mantissa = (usartdiv / oversampling) << USART_BRR_DIV_M_SHIFT;
fraction = usartdiv % oversampling;
- writel_relaxed(mantissa | fraction, port->membase + USART_BRR);
+ writel_relaxed(mantissa | fraction, port->membase + ofs->brr);
uart_update_timeout(port, cflag, baud);
@@ -430,9 +567,12 @@
if ((termios->c_cflag & CREAD) == 0)
port->ignore_status_mask |= USART_SR_DUMMY_RX;
- writel_relaxed(cr3, port->membase + USART_CR3);
- writel_relaxed(cr2, port->membase + USART_CR2);
- writel_relaxed(cr1, port->membase + USART_CR1);
+ if (stm32_port->rx_ch)
+ cr3 |= USART_CR3_DMAR;
+
+ writel_relaxed(cr3, port->membase + ofs->cr3);
+ writel_relaxed(cr2, port->membase + ofs->cr2);
+ writel_relaxed(cr1, port->membase + ofs->cr1);
spin_unlock_irqrestore(&port->lock, flags);
}
@@ -469,6 +609,8 @@
{
struct stm32_port *stm32port = container_of(port,
struct stm32_port, port);
+ struct stm32_usart_offsets *ofs = &stm32port->info->ofs;
+ struct stm32_usart_config *cfg = &stm32port->info->cfg;
unsigned long flags = 0;
switch (state) {
@@ -477,7 +619,7 @@
break;
case UART_PM_STATE_OFF:
spin_lock_irqsave(&port->lock, flags);
- stm32_clr_bits(port, USART_CR1, USART_CR1_UE);
+ stm32_clr_bits(port, ofs->cr1, BIT(cfg->uart_enable_bit));
spin_unlock_irqrestore(&port->lock, flags);
clk_disable_unprepare(stm32port->clk);
break;
@@ -539,8 +681,6 @@
if (!stm32port->port.uartclk)
ret = -EINVAL;
- clk_disable_unprepare(stm32port->clk);
-
return ret;
}
@@ -560,30 +700,162 @@
return NULL;
stm32_ports[id].hw_flow_control = of_property_read_bool(np,
- "auto-flow-control");
+ "st,hw-flow-ctrl");
stm32_ports[id].port.line = id;
return &stm32_ports[id];
}
#ifdef CONFIG_OF
static const struct of_device_id stm32_match[] = {
- { .compatible = "st,stm32-usart", },
- { .compatible = "st,stm32-uart", },
+ { .compatible = "st,stm32-usart", .data = &stm32f4_info},
+ { .compatible = "st,stm32-uart", .data = &stm32f4_info},
+ { .compatible = "st,stm32f7-usart", .data = &stm32f7_info},
+ { .compatible = "st,stm32f7-uart", .data = &stm32f7_info},
{},
};
MODULE_DEVICE_TABLE(of, stm32_match);
#endif
+static int stm32_of_dma_rx_probe(struct stm32_port *stm32port,
+ struct platform_device *pdev)
+{
+ struct stm32_usart_offsets *ofs = &stm32port->info->ofs;
+ struct uart_port *port = &stm32port->port;
+ struct device *dev = &pdev->dev;
+ struct dma_slave_config config;
+ struct dma_async_tx_descriptor *desc = NULL;
+ dma_cookie_t cookie;
+ int ret;
+
+ /* Request DMA RX channel */
+ stm32port->rx_ch = dma_request_slave_channel(dev, "rx");
+ if (!stm32port->rx_ch) {
+ dev_info(dev, "rx dma alloc failed\n");
+ return -ENODEV;
+ }
+ stm32port->rx_buf = dma_alloc_coherent(&pdev->dev, RX_BUF_L,
+ &stm32port->rx_dma_buf,
+ GFP_KERNEL);
+ if (!stm32port->rx_buf) {
+ ret = -ENOMEM;
+ goto alloc_err;
+ }
+
+ /* Configure DMA channel */
+ memset(&config, 0, sizeof(config));
+ config.src_addr = port->mapbase + ofs->rdr;
+ config.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+
+ ret = dmaengine_slave_config(stm32port->rx_ch, &config);
+ if (ret < 0) {
+ dev_err(dev, "rx dma channel config failed\n");
+ ret = -ENODEV;
+ goto config_err;
+ }
+
+ /* Prepare a DMA cyclic transaction */
+ desc = dmaengine_prep_dma_cyclic(stm32port->rx_ch,
+ stm32port->rx_dma_buf,
+ RX_BUF_L, RX_BUF_P, DMA_DEV_TO_MEM,
+ DMA_PREP_INTERRUPT);
+ if (!desc) {
+ dev_err(dev, "rx dma prep cyclic failed\n");
+ ret = -ENODEV;
+ goto config_err;
+ }
+
+ /* No callback as dma buffer is drained on usart interrupt */
+ desc->callback = NULL;
+ desc->callback_param = NULL;
+
+ /* Push current DMA transaction in the pending queue */
+ cookie = dmaengine_submit(desc);
+
+ /* Issue pending DMA requests */
+ dma_async_issue_pending(stm32port->rx_ch);
+
+ return 0;
+
+config_err:
+ dma_free_coherent(&pdev->dev,
+ RX_BUF_L, stm32port->rx_buf,
+ stm32port->rx_dma_buf);
+
+alloc_err:
+ dma_release_channel(stm32port->rx_ch);
+ stm32port->rx_ch = NULL;
+
+ return ret;
+}
+
+static int stm32_of_dma_tx_probe(struct stm32_port *stm32port,
+ struct platform_device *pdev)
+{
+ struct stm32_usart_offsets *ofs = &stm32port->info->ofs;
+ struct uart_port *port = &stm32port->port;
+ struct device *dev = &pdev->dev;
+ struct dma_slave_config config;
+ int ret;
+
+ stm32port->tx_dma_busy = false;
+
+ /* Request DMA TX channel */
+ stm32port->tx_ch = dma_request_slave_channel(dev, "tx");
+ if (!stm32port->tx_ch) {
+ dev_info(dev, "tx dma alloc failed\n");
+ return -ENODEV;
+ }
+ stm32port->tx_buf = dma_alloc_coherent(&pdev->dev, TX_BUF_L,
+ &stm32port->tx_dma_buf,
+ GFP_KERNEL);
+ if (!stm32port->tx_buf) {
+ ret = -ENOMEM;
+ goto alloc_err;
+ }
+
+ /* Configure DMA channel */
+ memset(&config, 0, sizeof(config));
+ config.dst_addr = port->mapbase + ofs->tdr;
+ config.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+
+ ret = dmaengine_slave_config(stm32port->tx_ch, &config);
+ if (ret < 0) {
+ dev_err(dev, "tx dma channel config failed\n");
+ ret = -ENODEV;
+ goto config_err;
+ }
+
+ return 0;
+
+config_err:
+ dma_free_coherent(&pdev->dev,
+ TX_BUF_L, stm32port->tx_buf,
+ stm32port->tx_dma_buf);
+
+alloc_err:
+ dma_release_channel(stm32port->tx_ch);
+ stm32port->tx_ch = NULL;
+
+ return ret;
+}
+
static int stm32_serial_probe(struct platform_device *pdev)
{
- int ret;
+ const struct of_device_id *match;
struct stm32_port *stm32port;
+ int ret;
stm32port = stm32_of_get_stm32_port(pdev);
if (!stm32port)
return -ENODEV;
+ match = of_match_device(stm32_match, &pdev->dev);
+ if (match && match->data)
+ stm32port->info = (struct stm32_usart_info *)match->data;
+ else
+ return -EINVAL;
+
ret = stm32_init_port(stm32port, pdev);
if (ret)
return ret;
@@ -592,6 +864,14 @@
if (ret)
return ret;
+ ret = stm32_of_dma_rx_probe(stm32port, pdev);
+ if (ret)
+ dev_info(&pdev->dev, "interrupt mode used for rx (no dma)\n");
+
+ ret = stm32_of_dma_tx_probe(stm32port, pdev);
+ if (ret)
+ dev_info(&pdev->dev, "interrupt mode used for tx (no dma)\n");
+
platform_set_drvdata(pdev, &stm32port->port);
return 0;
@@ -600,6 +880,30 @@
static int stm32_serial_remove(struct platform_device *pdev)
{
struct uart_port *port = platform_get_drvdata(pdev);
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+
+ stm32_clr_bits(port, ofs->cr3, USART_CR3_DMAR);
+
+ if (stm32_port->rx_ch)
+ dma_release_channel(stm32_port->rx_ch);
+
+ if (stm32_port->rx_dma_buf)
+ dma_free_coherent(&pdev->dev,
+ RX_BUF_L, stm32_port->rx_buf,
+ stm32_port->rx_dma_buf);
+
+ stm32_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
+
+ if (stm32_port->tx_ch)
+ dma_release_channel(stm32_port->tx_ch);
+
+ if (stm32_port->tx_dma_buf)
+ dma_free_coherent(&pdev->dev,
+ TX_BUF_L, stm32_port->tx_buf,
+ stm32_port->tx_dma_buf);
+
+ clk_disable_unprepare(stm32_port->clk);
return uart_remove_one_port(&stm32_usart_driver, port);
}
@@ -608,15 +912,21 @@
#ifdef CONFIG_SERIAL_STM32_CONSOLE
static void stm32_console_putchar(struct uart_port *port, int ch)
{
- while (!(readl_relaxed(port->membase + USART_SR) & USART_SR_TXE))
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+
+ while (!(readl_relaxed(port->membase + ofs->isr) & USART_SR_TXE))
cpu_relax();
- writel_relaxed(ch, port->membase + USART_DR);
+ writel_relaxed(ch, port->membase + ofs->tdr);
}
static void stm32_console_write(struct console *co, const char *s, unsigned cnt)
{
struct uart_port *port = &stm32_ports[co->index].port;
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+ struct stm32_usart_config *cfg = &stm32_port->info->cfg;
unsigned long flags;
u32 old_cr1, new_cr1;
int locked = 1;
@@ -629,15 +939,16 @@
else
spin_lock(&port->lock);
- /* Save and disable interrupts */
- old_cr1 = readl_relaxed(port->membase + USART_CR1);
+ /* Save and disable interrupts, enable the transmitter */
+ old_cr1 = readl_relaxed(port->membase + ofs->cr1);
new_cr1 = old_cr1 & ~USART_CR1_IE_MASK;
- writel_relaxed(new_cr1, port->membase + USART_CR1);
+ new_cr1 |= USART_CR1_TE | BIT(cfg->uart_enable_bit);
+ writel_relaxed(new_cr1, port->membase + ofs->cr1);
uart_console_write(port, s, cnt, stm32_console_putchar);
/* Restore interrupt state */
- writel_relaxed(old_cr1, port->membase + USART_CR1);
+ writel_relaxed(old_cr1, port->membase + ofs->cr1);
if (locked)
spin_unlock(&port->lock);
diff --git a/drivers/tty/serial/stm32-usart.h b/drivers/tty/serial/stm32-usart.h
new file mode 100644
index 0000000..41d9749
--- /dev/null
+++ b/drivers/tty/serial/stm32-usart.h
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) Maxime Coquelin 2015
+ * Authors: Maxime Coquelin <mcoquelin.stm32@gmail.com>
+ * Gerald Baeza <gerald_baeza@yahoo.fr>
+ * License terms: GNU General Public License (GPL), version 2
+ */
+
+#define DRIVER_NAME "stm32-usart"
+
+struct stm32_usart_offsets {
+ u8 cr1;
+ u8 cr2;
+ u8 cr3;
+ u8 brr;
+ u8 gtpr;
+ u8 rtor;
+ u8 rqr;
+ u8 isr;
+ u8 icr;
+ u8 rdr;
+ u8 tdr;
+};
+
+struct stm32_usart_config {
+ u8 uart_enable_bit; /* USART_CR1_UE */
+ bool has_7bits_data;
+};
+
+struct stm32_usart_info {
+ struct stm32_usart_offsets ofs;
+ struct stm32_usart_config cfg;
+};
+
+#define UNDEF_REG ~0
+
+/* Register offsets */
+struct stm32_usart_info stm32f4_info = {
+ .ofs = {
+ .isr = 0x00,
+ .rdr = 0x04,
+ .tdr = 0x04,
+ .brr = 0x08,
+ .cr1 = 0x0c,
+ .cr2 = 0x10,
+ .cr3 = 0x14,
+ .gtpr = 0x18,
+ .rtor = UNDEF_REG,
+ .rqr = UNDEF_REG,
+ .icr = UNDEF_REG,
+ },
+ .cfg = {
+ .uart_enable_bit = 13,
+ .has_7bits_data = false,
+ }
+};
+
+struct stm32_usart_info stm32f7_info = {
+ .ofs = {
+ .cr1 = 0x00,
+ .cr2 = 0x04,
+ .cr3 = 0x08,
+ .brr = 0x0c,
+ .gtpr = 0x10,
+ .rtor = 0x14,
+ .rqr = 0x18,
+ .isr = 0x1c,
+ .icr = 0x20,
+ .rdr = 0x24,
+ .tdr = 0x28,
+ },
+ .cfg = {
+ .uart_enable_bit = 0,
+ .has_7bits_data = true,
+ }
+};
+
+/* USART_SR (F4) / USART_ISR (F7) */
+#define USART_SR_PE BIT(0)
+#define USART_SR_FE BIT(1)
+#define USART_SR_NF BIT(2)
+#define USART_SR_ORE BIT(3)
+#define USART_SR_IDLE BIT(4)
+#define USART_SR_RXNE BIT(5)
+#define USART_SR_TC BIT(6)
+#define USART_SR_TXE BIT(7)
+#define USART_SR_LBD BIT(8)
+#define USART_SR_CTSIF BIT(9)
+#define USART_SR_CTS BIT(10) /* F7 */
+#define USART_SR_RTOF BIT(11) /* F7 */
+#define USART_SR_EOBF BIT(12) /* F7 */
+#define USART_SR_ABRE BIT(14) /* F7 */
+#define USART_SR_ABRF BIT(15) /* F7 */
+#define USART_SR_BUSY BIT(16) /* F7 */
+#define USART_SR_CMF BIT(17) /* F7 */
+#define USART_SR_SBKF BIT(18) /* F7 */
+#define USART_SR_TEACK BIT(21) /* F7 */
+#define USART_SR_ERR_MASK (USART_SR_LBD | USART_SR_ORE | \
+ USART_SR_FE | USART_SR_PE)
+/* Dummy bits */
+#define USART_SR_DUMMY_RX BIT(16)
+
+/* USART_ICR (F7) */
+#define USART_CR_TC BIT(6)
+
+/* USART_DR */
+#define USART_DR_MASK GENMASK(8, 0)
+
+/* USART_BRR */
+#define USART_BRR_DIV_F_MASK GENMASK(3, 0)
+#define USART_BRR_DIV_M_MASK GENMASK(15, 4)
+#define USART_BRR_DIV_M_SHIFT 4
+
+/* USART_CR1 */
+#define USART_CR1_SBK BIT(0)
+#define USART_CR1_RWU BIT(1) /* F4 */
+#define USART_CR1_RE BIT(2)
+#define USART_CR1_TE BIT(3)
+#define USART_CR1_IDLEIE BIT(4)
+#define USART_CR1_RXNEIE BIT(5)
+#define USART_CR1_TCIE BIT(6)
+#define USART_CR1_TXEIE BIT(7)
+#define USART_CR1_PEIE BIT(8)
+#define USART_CR1_PS BIT(9)
+#define USART_CR1_PCE BIT(10)
+#define USART_CR1_WAKE BIT(11)
+#define USART_CR1_M BIT(12)
+#define USART_CR1_M0 BIT(12) /* F7 */
+#define USART_CR1_MME BIT(13) /* F7 */
+#define USART_CR1_CMIE BIT(14) /* F7 */
+#define USART_CR1_OVER8 BIT(15)
+#define USART_CR1_DEDT_MASK GENMASK(20, 16) /* F7 */
+#define USART_CR1_DEAT_MASK GENMASK(25, 21) /* F7 */
+#define USART_CR1_RTOIE BIT(26) /* F7 */
+#define USART_CR1_EOBIE BIT(27) /* F7 */
+#define USART_CR1_M1 BIT(28) /* F7 */
+#define USART_CR1_IE_MASK (GENMASK(8, 4) | BIT(14) | BIT(26) | BIT(27))
+
+/* USART_CR2 */
+#define USART_CR2_ADD_MASK GENMASK(3, 0) /* F4 */
+#define USART_CR2_ADDM7 BIT(4) /* F7 */
+#define USART_CR2_LBDL BIT(5)
+#define USART_CR2_LBDIE BIT(6)
+#define USART_CR2_LBCL BIT(8)
+#define USART_CR2_CPHA BIT(9)
+#define USART_CR2_CPOL BIT(10)
+#define USART_CR2_CLKEN BIT(11)
+#define USART_CR2_STOP_2B BIT(13)
+#define USART_CR2_STOP_MASK GENMASK(13, 12)
+#define USART_CR2_LINEN BIT(14)
+#define USART_CR2_SWAP BIT(15) /* F7 */
+#define USART_CR2_RXINV BIT(16) /* F7 */
+#define USART_CR2_TXINV BIT(17) /* F7 */
+#define USART_CR2_DATAINV BIT(18) /* F7 */
+#define USART_CR2_MSBFIRST BIT(19) /* F7 */
+#define USART_CR2_ABREN BIT(20) /* F7 */
+#define USART_CR2_ABRMOD_MASK GENMASK(22, 21) /* F7 */
+#define USART_CR2_RTOEN BIT(23) /* F7 */
+#define USART_CR2_ADD_F7_MASK GENMASK(31, 24) /* F7 */
+
+/* USART_CR3 */
+#define USART_CR3_EIE BIT(0)
+#define USART_CR3_IREN BIT(1)
+#define USART_CR3_IRLP BIT(2)
+#define USART_CR3_HDSEL BIT(3)
+#define USART_CR3_NACK BIT(4)
+#define USART_CR3_SCEN BIT(5)
+#define USART_CR3_DMAR BIT(6)
+#define USART_CR3_DMAT BIT(7)
+#define USART_CR3_RTSE BIT(8)
+#define USART_CR3_CTSE BIT(9)
+#define USART_CR3_CTSIE BIT(10)
+#define USART_CR3_ONEBIT BIT(11)
+#define USART_CR3_OVRDIS BIT(12) /* F7 */
+#define USART_CR3_DDRE BIT(13) /* F7 */
+#define USART_CR3_DEM BIT(14) /* F7 */
+#define USART_CR3_DEP BIT(15) /* F7 */
+#define USART_CR3_SCARCNT_MASK GENMASK(19, 17) /* F7 */
+
+/* USART_GTPR */
+#define USART_GTPR_PSC_MASK GENMASK(7, 0)
+#define USART_GTPR_GT_MASK GENMASK(15, 8)
+
+/* USART_RTOR */
+#define USART_RTOR_RTO_MASK GENMASK(23, 0) /* F7 */
+#define USART_RTOR_BLEN_MASK GENMASK(31, 24) /* F7 */
+
+/* USART_RQR */
+#define USART_RQR_ABRRQ BIT(0) /* F7 */
+#define USART_RQR_SBKRQ BIT(1) /* F7 */
+#define USART_RQR_MMRQ BIT(2) /* F7 */
+#define USART_RQR_RXFRQ BIT(3) /* F7 */
+#define USART_RQR_TXFRQ BIT(4) /* F7 */
+
+/* USART_ICR */
+#define USART_ICR_PECF BIT(0) /* F7 */
+#define USART_ICR_FFECF BIT(1) /* F7 */
+#define USART_ICR_NCF BIT(2) /* F7 */
+#define USART_ICR_ORECF BIT(3) /* F7 */
+#define USART_ICR_IDLECF BIT(4) /* F7 */
+#define USART_ICR_TCCF BIT(6) /* F7 */
+#define USART_ICR_LBDCF BIT(8) /* F7 */
+#define USART_ICR_CTSCF BIT(9) /* F7 */
+#define USART_ICR_RTOCF BIT(11) /* F7 */
+#define USART_ICR_EOBCF BIT(12) /* F7 */
+#define USART_ICR_CMCF BIT(17) /* F7 */
+
+#define STM32_SERIAL_NAME "ttyS"
+#define STM32_MAX_PORTS 6
+
+#define RX_BUF_L 200 /* dma rx buffer length */
+#define RX_BUF_P RX_BUF_L /* dma rx buffer period */
+#define TX_BUF_L 200 /* dma tx buffer length */
+
+struct stm32_port {
+ struct uart_port port;
+ struct clk *clk;
+ struct stm32_usart_info *info;
+ struct dma_chan *rx_ch; /* dma rx channel */
+ dma_addr_t rx_dma_buf; /* dma rx buffer bus address */
+ unsigned char *rx_buf; /* dma rx buffer cpu address */
+ struct dma_chan *tx_ch; /* dma tx channel */
+ dma_addr_t tx_dma_buf; /* dma tx buffer bus address */
+ unsigned char *tx_buf; /* dma tx buffer cpu address */
+ bool tx_dma_busy; /* dma tx busy */
+ bool hw_flow_control;
+};
+
+static struct stm32_port stm32_ports[STM32_MAX_PORTS];
+static struct uart_driver stm32_usart_driver;
diff --git a/drivers/tty/serial/timbuart.c b/drivers/tty/serial/timbuart.c
index 512c162..5da7fe4 100644
--- a/drivers/tty/serial/timbuart.c
+++ b/drivers/tty/serial/timbuart.c
@@ -394,7 +394,7 @@
return -EINVAL;
}
-static struct uart_ops timbuart_ops = {
+static const struct uart_ops timbuart_ops = {
.tx_empty = timbuart_tx_empty,
.set_mctrl = timbuart_set_mctrl,
.get_mctrl = timbuart_get_mctrl,
diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c
index 05089b6..817bb0d 100644
--- a/drivers/tty/serial/uartlite.c
+++ b/drivers/tty/serial/uartlite.c
@@ -387,7 +387,7 @@
}
#endif
-static struct uart_ops ulite_ops = {
+static const struct uart_ops ulite_ops = {
.tx_empty = ulite_tx_empty,
.set_mctrl = ulite_set_mctrl,
.get_mctrl = ulite_get_mctrl,
diff --git a/drivers/tty/serial/vt8500_serial.c b/drivers/tty/serial/vt8500_serial.c
index 23cfc5e..6b85adc 100644
--- a/drivers/tty/serial/vt8500_serial.c
+++ b/drivers/tty/serial/vt8500_serial.c
@@ -118,7 +118,7 @@
* have been allocated as we can't use pdev->id in
* devicetree
*/
-static unsigned long vt8500_ports_in_use;
+static DECLARE_BITMAP(vt8500_ports_in_use, VT8500_MAX_PORTS);
static inline void vt8500_write(struct uart_port *port, unsigned int val,
unsigned int off)
@@ -663,15 +663,15 @@
if (port < 0) {
/* calculate the port id */
- port = find_first_zero_bit(&vt8500_ports_in_use,
- sizeof(vt8500_ports_in_use));
+ port = find_first_zero_bit(vt8500_ports_in_use,
+ VT8500_MAX_PORTS);
}
if (port >= VT8500_MAX_PORTS)
return -ENODEV;
/* reserve the port id */
- if (test_and_set_bit(port, &vt8500_ports_in_use)) {
+ if (test_and_set_bit(port, vt8500_ports_in_use)) {
/* port already in use - shouldn't really happen */
return -EBUSY;
}
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index 9ca1a4d..f37edaa 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -57,7 +57,7 @@
#define CDNS_UART_IMR 0x10 /* Interrupt Mask */
#define CDNS_UART_ISR 0x14 /* Interrupt Status */
#define CDNS_UART_BAUDGEN 0x18 /* Baud Rate Generator */
-#define CDNS_UART_RXTOUT 0x1C /* RX Timeout */
+#define CDNS_UART_RXTOUT 0x1C /* RX Timeout */
#define CDNS_UART_RXWM 0x20 /* RX FIFO Trigger Level */
#define CDNS_UART_MODEMCR 0x24 /* Modem Control */
#define CDNS_UART_MODEMSR 0x28 /* Modem Status */
@@ -68,6 +68,7 @@
#define CDNS_UART_IRRX_PWIDTH 0x3C /* IR Min Received Pulse Width */
#define CDNS_UART_IRTX_PWIDTH 0x40 /* IR Transmitted pulse Width */
#define CDNS_UART_TXWM 0x44 /* TX FIFO Trigger Level */
+#define CDNS_UART_RXBS 0x48 /* RX FIFO byte status register */
/* Control Register Bit Definitions */
#define CDNS_UART_CR_STOPBRK 0x00000100 /* Stop TX break */
@@ -79,6 +80,9 @@
#define CDNS_UART_CR_TXRST 0x00000002 /* TX logic reset */
#define CDNS_UART_CR_RXRST 0x00000001 /* RX logic reset */
#define CDNS_UART_CR_RST_TO 0x00000040 /* Restart Timeout Counter */
+#define CDNS_UART_RXBS_PARITY 0x00000001 /* Parity error status */
+#define CDNS_UART_RXBS_FRAMING 0x00000002 /* Framing error status */
+#define CDNS_UART_RXBS_BRK 0x00000004 /* Overrun error status */
/*
* Mode Register:
@@ -126,13 +130,27 @@
#define CDNS_UART_IXR_RXEMPTY 0x00000002 /* RX FIFO empty interrupt. */
#define CDNS_UART_IXR_MASK 0x00001FFF /* Valid bit mask */
-#define CDNS_UART_RX_IRQS (CDNS_UART_IXR_PARITY | CDNS_UART_IXR_FRAMING | \
- CDNS_UART_IXR_OVERRUN | CDNS_UART_IXR_RXTRIG | \
+ /*
+ * Do not enable parity error interrupt for the following
+ * reason: When parity error interrupt is enabled, each Rx
+ * parity error always results in 2 events. The first one
+ * being parity error interrupt and the second one with a
+ * proper Rx interrupt with the incoming data. Disabling
+ * parity error interrupt ensures better handling of parity
+ * error events. With this change, for a parity error case, we
+ * get a Rx interrupt with parity error set in ISR register
+ * and we still handle parity errors in the desired way.
+ */
+
+#define CDNS_UART_RX_IRQS (CDNS_UART_IXR_FRAMING | \
+ CDNS_UART_IXR_OVERRUN | \
+ CDNS_UART_IXR_RXTRIG | \
CDNS_UART_IXR_TOUT)
/* Goes in read_status_mask for break detection as the HW doesn't do it*/
-#define CDNS_UART_IXR_BRK 0x80000000
+#define CDNS_UART_IXR_BRK 0x00002000
+#define CDNS_UART_RXBS_SUPPORT BIT(1)
/*
* Modem Control register:
* The read/write Modem Control register controls the interface with the modem
@@ -172,46 +190,66 @@
struct clk *pclk;
unsigned int baud;
struct notifier_block clk_rate_change_nb;
+ u32 quirks;
+};
+struct cdns_platform_data {
+ u32 quirks;
};
#define to_cdns_uart(_nb) container_of(_nb, struct cdns_uart, \
clk_rate_change_nb);
-static void cdns_uart_handle_rx(struct uart_port *port, unsigned int isrstatus)
+/**
+ * cdns_uart_handle_rx - Handle the received bytes along with Rx errors.
+ * @dev_id: Id of the UART port
+ * @isrstatus: The interrupt status register value as read
+ * Return: None
+ */
+static void cdns_uart_handle_rx(void *dev_id, unsigned int isrstatus)
{
- /*
- * There is no hardware break detection, so we interpret framing
- * error with all-zeros data as a break sequence. Most of the time,
- * there's another non-zero byte at the end of the sequence.
- */
- if (isrstatus & CDNS_UART_IXR_FRAMING) {
- while (!(readl(port->membase + CDNS_UART_SR) &
- CDNS_UART_SR_RXEMPTY)) {
- if (!readl(port->membase + CDNS_UART_FIFO)) {
+ struct uart_port *port = (struct uart_port *)dev_id;
+ struct cdns_uart *cdns_uart = port->private_data;
+ unsigned int data;
+ unsigned int rxbs_status = 0;
+ unsigned int status_mask;
+ unsigned int framerrprocessed = 0;
+ char status = TTY_NORMAL;
+ bool is_rxbs_support;
+
+ is_rxbs_support = cdns_uart->quirks & CDNS_UART_RXBS_SUPPORT;
+
+ while ((readl(port->membase + CDNS_UART_SR) &
+ CDNS_UART_SR_RXEMPTY) != CDNS_UART_SR_RXEMPTY) {
+ if (is_rxbs_support)
+ rxbs_status = readl(port->membase + CDNS_UART_RXBS);
+ data = readl(port->membase + CDNS_UART_FIFO);
+ port->icount.rx++;
+ /*
+ * There is no hardware break detection in Zynq, so we interpret
+ * framing error with all-zeros data as a break sequence.
+ * Most of the time, there's another non-zero byte at the
+ * end of the sequence.
+ */
+ if (!is_rxbs_support && (isrstatus & CDNS_UART_IXR_FRAMING)) {
+ if (!data) {
port->read_status_mask |= CDNS_UART_IXR_BRK;
- isrstatus &= ~CDNS_UART_IXR_FRAMING;
+ framerrprocessed = 1;
+ continue;
}
}
- writel(CDNS_UART_IXR_FRAMING, port->membase + CDNS_UART_ISR);
- }
+ if (is_rxbs_support && (rxbs_status & CDNS_UART_RXBS_BRK)) {
+ port->icount.brk++;
+ status = TTY_BREAK;
+ if (uart_handle_break(port))
+ continue;
+ }
- /* drop byte with parity error if IGNPAR specified */
- if (isrstatus & port->ignore_status_mask & CDNS_UART_IXR_PARITY)
- isrstatus &= ~(CDNS_UART_IXR_RXTRIG | CDNS_UART_IXR_TOUT);
+ isrstatus &= port->read_status_mask;
+ isrstatus &= ~port->ignore_status_mask;
+ status_mask = port->read_status_mask;
+ status_mask &= ~port->ignore_status_mask;
- isrstatus &= port->read_status_mask;
- isrstatus &= ~port->ignore_status_mask;
-
- if (!(isrstatus & (CDNS_UART_IXR_TOUT | CDNS_UART_IXR_RXTRIG)))
- return;
-
- while (!(readl(port->membase + CDNS_UART_SR) & CDNS_UART_SR_RXEMPTY)) {
- u32 data;
- char status = TTY_NORMAL;
-
- data = readl(port->membase + CDNS_UART_FIFO);
-
- /* Non-NULL byte after BREAK is garbage (99%) */
- if (data && (port->read_status_mask & CDNS_UART_IXR_BRK)) {
+ if (data &&
+ (port->read_status_mask & CDNS_UART_IXR_BRK)) {
port->read_status_mask &= ~CDNS_UART_IXR_BRK;
port->icount.brk++;
if (uart_handle_break(port))
@@ -221,57 +259,83 @@
if (uart_handle_sysrq_char(port, data))
continue;
- port->icount.rx++;
-
- if (isrstatus & CDNS_UART_IXR_PARITY) {
- port->icount.parity++;
- status = TTY_PARITY;
- } else if (isrstatus & CDNS_UART_IXR_FRAMING) {
- port->icount.frame++;
- status = TTY_FRAME;
- } else if (isrstatus & CDNS_UART_IXR_OVERRUN) {
- port->icount.overrun++;
+ if (is_rxbs_support) {
+ if ((rxbs_status & CDNS_UART_RXBS_PARITY)
+ && (status_mask & CDNS_UART_IXR_PARITY)) {
+ port->icount.parity++;
+ status = TTY_PARITY;
+ }
+ if ((rxbs_status & CDNS_UART_RXBS_FRAMING)
+ && (status_mask & CDNS_UART_IXR_PARITY)) {
+ port->icount.frame++;
+ status = TTY_FRAME;
+ }
+ } else {
+ if (isrstatus & CDNS_UART_IXR_PARITY) {
+ port->icount.parity++;
+ status = TTY_PARITY;
+ }
+ if ((isrstatus & CDNS_UART_IXR_FRAMING) &&
+ !framerrprocessed) {
+ port->icount.frame++;
+ status = TTY_FRAME;
+ }
}
-
- uart_insert_char(port, isrstatus, CDNS_UART_IXR_OVERRUN,
- data, status);
+ if (isrstatus & CDNS_UART_IXR_OVERRUN) {
+ port->icount.overrun++;
+ tty_insert_flip_char(&port->state->port, 0,
+ TTY_OVERRUN);
+ }
+ tty_insert_flip_char(&port->state->port, data, status);
+ isrstatus = 0;
}
+ spin_unlock(&port->lock);
tty_flip_buffer_push(&port->state->port);
+ spin_lock(&port->lock);
}
-static void cdns_uart_handle_tx(struct uart_port *port)
+/**
+ * cdns_uart_handle_tx - Handle the bytes to be Txed.
+ * @dev_id: Id of the UART port
+ * Return: None
+ */
+static void cdns_uart_handle_tx(void *dev_id)
{
+ struct uart_port *port = (struct uart_port *)dev_id;
unsigned int numbytes;
if (uart_circ_empty(&port->state->xmit)) {
writel(CDNS_UART_IXR_TXEMPTY, port->membase + CDNS_UART_IDR);
- return;
+ } else {
+ numbytes = port->fifosize;
+ while (numbytes && !uart_circ_empty(&port->state->xmit) &&
+ !(readl(port->membase + CDNS_UART_SR) & CDNS_UART_SR_TXFULL)) {
+ /*
+ * Get the data from the UART circular buffer
+ * and write it to the cdns_uart's TX_FIFO
+ * register.
+ */
+ writel(
+ port->state->xmit.buf[port->state->xmit.
+ tail], port->membase + CDNS_UART_FIFO);
+
+ port->icount.tx++;
+
+ /*
+ * Adjust the tail of the UART buffer and wrap
+ * the buffer if it reaches limit.
+ */
+ port->state->xmit.tail =
+ (port->state->xmit.tail + 1) &
+ (UART_XMIT_SIZE - 1);
+
+ numbytes--;
+ }
+
+ if (uart_circ_chars_pending(
+ &port->state->xmit) < WAKEUP_CHARS)
+ uart_write_wakeup(port);
}
-
- numbytes = port->fifosize;
- while (numbytes && !uart_circ_empty(&port->state->xmit) &&
- !(readl(port->membase + CDNS_UART_SR) & CDNS_UART_SR_TXFULL)) {
- /*
- * Get the data from the UART circular buffer
- * and write it to the cdns_uart's TX_FIFO
- * register.
- */
- writel(port->state->xmit.buf[port->state->xmit.tail],
- port->membase + CDNS_UART_FIFO);
- port->icount.tx++;
-
- /*
- * Adjust the tail of the UART buffer and wrap
- * the buffer if it reaches limit.
- */
- port->state->xmit.tail =
- (port->state->xmit.tail + 1) & (UART_XMIT_SIZE - 1);
-
- numbytes--;
- }
-
- if (uart_circ_chars_pending(&port->state->xmit) < WAKEUP_CHARS)
- uart_write_wakeup(port);
}
/**
@@ -284,27 +348,24 @@
static irqreturn_t cdns_uart_isr(int irq, void *dev_id)
{
struct uart_port *port = (struct uart_port *)dev_id;
- unsigned long flags;
unsigned int isrstatus;
- spin_lock_irqsave(&port->lock, flags);
+ spin_lock(&port->lock);
/* Read the interrupt status register to determine which
- * interrupt(s) is/are active.
+ * interrupt(s) is/are active and clear them.
*/
isrstatus = readl(port->membase + CDNS_UART_ISR);
-
- if (isrstatus & CDNS_UART_RX_IRQS)
- cdns_uart_handle_rx(port, isrstatus);
-
- if ((isrstatus & CDNS_UART_IXR_TXEMPTY) == CDNS_UART_IXR_TXEMPTY)
- cdns_uart_handle_tx(port);
-
writel(isrstatus, port->membase + CDNS_UART_ISR);
- /* be sure to release the lock and tty before leaving */
- spin_unlock_irqrestore(&port->lock, flags);
+ if (isrstatus & CDNS_UART_IXR_TXEMPTY) {
+ cdns_uart_handle_tx(dev_id);
+ isrstatus &= ~CDNS_UART_IXR_TXEMPTY;
+ }
+ if (isrstatus & CDNS_UART_IXR_MASK)
+ cdns_uart_handle_rx(dev_id, isrstatus);
+ spin_unlock(&port->lock);
return IRQ_HANDLED;
}
@@ -653,6 +714,10 @@
ctrl_reg |= CDNS_UART_CR_TXRST | CDNS_UART_CR_RXRST;
writel(ctrl_reg, port->membase + CDNS_UART_CR);
+ while (readl(port->membase + CDNS_UART_CR) &
+ (CDNS_UART_CR_TXRST | CDNS_UART_CR_RXRST))
+ cpu_relax();
+
/*
* Clear the RX disable and TX disable bits and then set the TX enable
* bit and RX enable bit to enable the transmitter and receiver.
@@ -736,10 +801,14 @@
*/
static int cdns_uart_startup(struct uart_port *port)
{
+ struct cdns_uart *cdns_uart = port->private_data;
+ bool is_brk_support;
int ret;
unsigned long flags;
unsigned int status = 0;
+ is_brk_support = cdns_uart->quirks & CDNS_UART_RXBS_SUPPORT;
+
spin_lock_irqsave(&port->lock, flags);
/* Disable the TX and RX */
@@ -752,6 +821,10 @@
writel(CDNS_UART_CR_TXRST | CDNS_UART_CR_RXRST,
port->membase + CDNS_UART_CR);
+ while (readl(port->membase + CDNS_UART_CR) &
+ (CDNS_UART_CR_TXRST | CDNS_UART_CR_RXRST))
+ cpu_relax();
+
/*
* Clear the RX disable bit and then set the RX enable bit to enable
* the receiver.
@@ -794,7 +867,11 @@
}
/* Set the Interrupt Registers with desired interrupts */
- writel(CDNS_UART_RX_IRQS, port->membase + CDNS_UART_IER);
+ if (is_brk_support)
+ writel(CDNS_UART_RX_IRQS | CDNS_UART_IXR_BRK,
+ port->membase + CDNS_UART_IER);
+ else
+ writel(CDNS_UART_RX_IRQS, port->membase + CDNS_UART_IER);
return 0;
}
@@ -993,7 +1070,7 @@
}
}
-static struct uart_ops cdns_uart_ops = {
+static const struct uart_ops cdns_uart_ops = {
.set_mctrl = cdns_uart_set_mctrl,
.get_mctrl = cdns_uart_get_mctrl,
.start_tx = cdns_uart_start_tx,
@@ -1088,9 +1165,34 @@
static int __init cdns_early_console_setup(struct earlycon_device *device,
const char *opt)
{
- if (!device->port.membase)
+ struct uart_port *port = &device->port;
+
+ if (!port->membase)
return -ENODEV;
+ /* initialise control register */
+ writel(CDNS_UART_CR_TX_EN|CDNS_UART_CR_TXRST|CDNS_UART_CR_RXRST,
+ port->membase + CDNS_UART_CR);
+
+ /* only set baud if specified on command line - otherwise
+ * assume it has been initialized by a boot loader.
+ */
+ if (device->baud) {
+ u32 cd = 0, bdiv = 0;
+ u32 mr;
+ int div8;
+
+ cdns_uart_calc_baud_divs(port->uartclk, device->baud,
+ &bdiv, &cd, &div8);
+ mr = CDNS_UART_MR_PARITY_NONE;
+ if (div8)
+ mr |= CDNS_UART_MR_CLKSEL;
+
+ writel(mr, port->membase + CDNS_UART_MR);
+ writel(cd, port->membase + CDNS_UART_BAUDGEN);
+ writel(bdiv, port->membase + CDNS_UART_BAUDDIV);
+ }
+
device->con->write = cdns_early_write;
return 0;
@@ -1328,6 +1430,18 @@
static SIMPLE_DEV_PM_OPS(cdns_uart_dev_pm_ops, cdns_uart_suspend,
cdns_uart_resume);
+static const struct cdns_platform_data zynqmp_uart_def = {
+ .quirks = CDNS_UART_RXBS_SUPPORT, };
+
+/* Match table for of_platform binding */
+static const struct of_device_id cdns_uart_of_match[] = {
+ { .compatible = "xlnx,xuartps", },
+ { .compatible = "cdns,uart-r1p8", },
+ { .compatible = "cdns,uart-r1p12", .data = &zynqmp_uart_def },
+ {}
+};
+MODULE_DEVICE_TABLE(of, cdns_uart_of_match);
+
/**
* cdns_uart_probe - Platform driver probe
* @pdev: Pointer to the platform device structure
@@ -1340,12 +1454,20 @@
struct uart_port *port;
struct resource *res;
struct cdns_uart *cdns_uart_data;
+ const struct of_device_id *match;
cdns_uart_data = devm_kzalloc(&pdev->dev, sizeof(*cdns_uart_data),
GFP_KERNEL);
if (!cdns_uart_data)
return -ENOMEM;
+ match = of_match_node(cdns_uart_of_match, pdev->dev.of_node);
+ if (match && match->data) {
+ const struct cdns_platform_data *data = match->data;
+
+ cdns_uart_data->quirks = data->quirks;
+ }
+
cdns_uart_data->pclk = devm_clk_get(&pdev->dev, "pclk");
if (IS_ERR(cdns_uart_data->pclk)) {
cdns_uart_data->pclk = devm_clk_get(&pdev->dev, "aper_clk");
@@ -1471,14 +1593,6 @@
return rc;
}
-/* Match table for of_platform binding */
-static const struct of_device_id cdns_uart_of_match[] = {
- { .compatible = "xlnx,xuartps", },
- { .compatible = "cdns,uart-r1p8", },
- {}
-};
-MODULE_DEVICE_TABLE(of, cdns_uart_of_match);
-
static struct platform_driver cdns_uart_platform_driver = {
.probe = cdns_uart_probe,
.remove = cdns_uart_remove,
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 2705ca9..e841a4e 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1312,12 +1312,12 @@
if (i > vc->vc_npar)
return i;
- if (vc->vc_par[i] == 5 && i < vc->vc_npar) {
- /* 256 colours -- ubiquitous */
+ if (vc->vc_par[i] == 5 && i + 1 <= vc->vc_npar) {
+ /* 256 colours */
i++;
rgb_from_256(vc->vc_par[i], &c);
- } else if (vc->vc_par[i] == 2 && i <= vc->vc_npar + 3) {
- /* 24 bit -- extremely rare */
+ } else if (vc->vc_par[i] == 2 && i + 3 <= vc->vc_npar) {
+ /* 24 bit */
c.r = vc->vc_par[i + 1];
c.g = vc->vc_par[i + 2];
c.b = vc->vc_par[i + 3];
@@ -1415,6 +1415,11 @@
(vc->vc_color & 0x0f);
break;
default:
+ if (vc->vc_par[i] >= 90 && vc->vc_par[i] <= 107) {
+ if (vc->vc_par[i] < 100)
+ vc->vc_intensity = 2;
+ vc->vc_par[i] -= 60;
+ }
if (vc->vc_par[i] >= 30 && vc->vc_par[i] <= 37)
vc->vc_color = color_table[vc->vc_par[i] - 30]
| (vc->vc_color & 0xf0);
diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c
index 915facb..e1134a4 100644
--- a/drivers/uio/uio_dmem_genirq.c
+++ b/drivers/uio/uio_dmem_genirq.c
@@ -229,7 +229,7 @@
++uiomem;
}
- priv->dmem_region_start = i;
+ priv->dmem_region_start = uiomem - &uioinfo->mem[0];
priv->num_dmem_regions = pdata->num_dynamic_regions;
for (i = 0; i < pdata->num_dynamic_regions; ++i) {
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 97fb2f8..3cc98c0 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -322,18 +322,7 @@
"vhost-test",
&vhost_test_fops,
};
-
-static int vhost_test_init(void)
-{
- return misc_register(&vhost_test_misc);
-}
-module_init(vhost_test_init);
-
-static void vhost_test_exit(void)
-{
- misc_deregister(&vhost_test_misc);
-}
-module_exit(vhost_test_exit);
+module_misc_device(vhost_test_misc);
MODULE_VERSION("0.0.1");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/vme/bridges/Kconfig b/drivers/vme/bridges/Kconfig
index f6d8545..f6ddc37 100644
--- a/drivers/vme/bridges/Kconfig
+++ b/drivers/vme/bridges/Kconfig
@@ -13,3 +13,11 @@
help
If you say Y here you get support for the Tundra TSI148 VME bridge
chip.
+
+config VME_FAKE
+ tristate "Fake"
+ help
+ If you say Y here you get support for the fake VME bridge. This
+ provides a virtualised VME Bus for devices with no VME bridge. This
+ is mainly useful for VME development (in the absence of VME
+ hardware).
diff --git a/drivers/vme/bridges/Makefile b/drivers/vme/bridges/Makefile
index 59638af..b074542 100644
--- a/drivers/vme/bridges/Makefile
+++ b/drivers/vme/bridges/Makefile
@@ -1,2 +1,3 @@
obj-$(CONFIG_VME_CA91CX42) += vme_ca91cx42.o
obj-$(CONFIG_VME_TSI148) += vme_tsi148.o
+obj-$(CONFIG_VME_FAKE) += vme_fake.o
diff --git a/drivers/vme/bridges/vme_ca91cx42.c b/drivers/vme/bridges/vme_ca91cx42.c
index 9f2c834..6b5ee89 100644
--- a/drivers/vme/bridges/vme_ca91cx42.c
+++ b/drivers/vme/bridges/vme_ca91cx42.c
@@ -47,6 +47,8 @@
{ },
};
+MODULE_DEVICE_TABLE(pci, ca91cx42_ids);
+
static struct pci_driver ca91cx42_driver = {
.name = driver_name,
.id_table = ca91cx42_ids,
@@ -69,7 +71,7 @@
for (i = 0; i < 4; i++) {
if (stat & CA91CX42_LINT_LM[i]) {
/* We only enable interrupts if the callback is set */
- bridge->lm_callback[i](i);
+ bridge->lm_callback[i](bridge->lm_data[i]);
serviced |= CA91CX42_LINT_LM[i];
}
}
@@ -1410,7 +1412,7 @@
* Callback will be passed the monitor triggered.
*/
static int ca91cx42_lm_attach(struct vme_lm_resource *lm, int monitor,
- void (*callback)(int))
+ void (*callback)(void *), void *data)
{
u32 lm_ctl, tmp;
struct ca91cx42_driver *bridge;
@@ -1438,6 +1440,7 @@
/* Attach callback */
bridge->lm_callback[monitor] = callback;
+ bridge->lm_data[monitor] = data;
/* Enable Location Monitor interrupt */
tmp = ioread32(bridge->base + LINT_EN);
@@ -1477,6 +1480,7 @@
/* Detach callback */
bridge->lm_callback[monitor] = NULL;
+ bridge->lm_data[monitor] = NULL;
/* If all location monitors disabled, disable global Location Monitor */
if ((tmp & (CA91CX42_LINT_LM0 | CA91CX42_LINT_LM1 | CA91CX42_LINT_LM2 |
diff --git a/drivers/vme/bridges/vme_ca91cx42.h b/drivers/vme/bridges/vme_ca91cx42.h
index d54119e..f35c9f5 100644
--- a/drivers/vme/bridges/vme_ca91cx42.h
+++ b/drivers/vme/bridges/vme_ca91cx42.h
@@ -43,7 +43,8 @@
wait_queue_head_t dma_queue;
wait_queue_head_t iack_queue;
wait_queue_head_t mbox_queue;
- void (*lm_callback[4])(int); /* Called in interrupt handler */
+ void (*lm_callback[4])(void *); /* Called in interrupt handler */
+ void *lm_data[4];
void *crcsr_kernel;
dma_addr_t crcsr_bus;
struct mutex vme_rmw; /* Only one RMW cycle at a time */
diff --git a/drivers/vme/bridges/vme_fake.c b/drivers/vme/bridges/vme_fake.c
new file mode 100644
index 0000000..30b3acc
--- /dev/null
+++ b/drivers/vme/bridges/vme_fake.c
@@ -0,0 +1,1306 @@
+/*
+ * Fake VME bridge support.
+ *
+ * This drive provides a fake VME bridge chip, this enables debugging of the
+ * VME framework in the absence of a VME system.
+ *
+ * This driver has to do a number of things in software that would be driven
+ * by hardware if it was available, it will also result in extra overhead at
+ * times when compared with driving actual hardware.
+ *
+ * Author: Martyn Welch <martyn@welches.me.uk>
+ * Copyright (c) 2014 Martyn Welch
+ *
+ * Based on vme_tsi148.c:
+ *
+ * Author: Martyn Welch <martyn.welch@ge.com>
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on work by Tom Armistead and Ajit Prem
+ * Copyright 2004 Motorola Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/vme.h>
+
+#include "../vme_bridge.h"
+
+/*
+ * Define the number of each that the fake driver supports.
+ */
+#define FAKE_MAX_MASTER 8 /* Max Master Windows */
+#define FAKE_MAX_SLAVE 8 /* Max Slave Windows */
+
+/* Structures to hold information normally held in device registers */
+struct fake_slave_window {
+ int enabled;
+ unsigned long long vme_base;
+ unsigned long long size;
+ void *buf_base;
+ u32 aspace;
+ u32 cycle;
+};
+
+struct fake_master_window {
+ int enabled;
+ unsigned long long vme_base;
+ unsigned long long size;
+ u32 aspace;
+ u32 cycle;
+ u32 dwidth;
+};
+
+/* Structure used to hold driver specific information */
+struct fake_driver {
+ struct vme_bridge *parent;
+ struct fake_slave_window slaves[FAKE_MAX_SLAVE];
+ struct fake_master_window masters[FAKE_MAX_MASTER];
+ u32 lm_enabled;
+ unsigned long long lm_base;
+ u32 lm_aspace;
+ u32 lm_cycle;
+ void (*lm_callback[4])(void *);
+ void *lm_data[4];
+ struct tasklet_struct int_tasklet;
+ int int_level;
+ int int_statid;
+ void *crcsr_kernel;
+ dma_addr_t crcsr_bus;
+ /* Only one VME interrupt can be generated at a time, provide locking */
+ struct mutex vme_int;
+};
+
+/* Module parameter */
+static int geoid;
+
+static const char driver_name[] = "vme_fake";
+
+static struct vme_bridge *exit_pointer;
+
+static struct device *vme_root;
+
+/*
+ * Calling VME bus interrupt callback if provided.
+ */
+static void fake_VIRQ_tasklet(unsigned long data)
+{
+ struct vme_bridge *fake_bridge;
+ struct fake_driver *bridge;
+
+ fake_bridge = (struct vme_bridge *) data;
+ bridge = fake_bridge->driver_priv;
+
+ vme_irq_handler(fake_bridge, bridge->int_level, bridge->int_statid);
+}
+
+/*
+ * Configure VME interrupt
+ */
+static void fake_irq_set(struct vme_bridge *fake_bridge, int level,
+ int state, int sync)
+{
+ /* Nothing to do */
+}
+
+static void *fake_pci_to_ptr(dma_addr_t addr)
+{
+ return (void *)(uintptr_t)addr;
+}
+
+static dma_addr_t fake_ptr_to_pci(void *addr)
+{
+ return (dma_addr_t)(uintptr_t)addr;
+}
+
+/*
+ * Generate a VME bus interrupt at the requested level & vector. Wait for
+ * interrupt to be acked.
+ */
+static int fake_irq_generate(struct vme_bridge *fake_bridge, int level,
+ int statid)
+{
+ struct fake_driver *bridge;
+
+ bridge = fake_bridge->driver_priv;
+
+ mutex_lock(&bridge->vme_int);
+
+ bridge->int_level = level;
+
+ bridge->int_statid = statid;
+
+ /*
+ * Schedule tasklet to run VME handler to emulate normal VME interrupt
+ * handler behaviour.
+ */
+ tasklet_schedule(&bridge->int_tasklet);
+
+ mutex_unlock(&bridge->vme_int);
+
+ return 0;
+}
+
+/*
+ * Initialize a slave window with the requested attributes.
+ */
+static int fake_slave_set(struct vme_slave_resource *image, int enabled,
+ unsigned long long vme_base, unsigned long long size,
+ dma_addr_t buf_base, u32 aspace, u32 cycle)
+{
+ unsigned int i, granularity = 0;
+ unsigned long long vme_bound;
+ struct vme_bridge *fake_bridge;
+ struct fake_driver *bridge;
+
+ fake_bridge = image->parent;
+ bridge = fake_bridge->driver_priv;
+
+ i = image->number;
+
+ switch (aspace) {
+ case VME_A16:
+ granularity = 0x10;
+ break;
+ case VME_A24:
+ granularity = 0x1000;
+ break;
+ case VME_A32:
+ granularity = 0x10000;
+ break;
+ case VME_A64:
+ granularity = 0x10000;
+ break;
+ case VME_CRCSR:
+ case VME_USER1:
+ case VME_USER2:
+ case VME_USER3:
+ case VME_USER4:
+ default:
+ pr_err("Invalid address space\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Bound address is a valid address for the window, adjust
+ * accordingly
+ */
+ vme_bound = vme_base + size - granularity;
+
+ if (vme_base & (granularity - 1)) {
+ pr_err("Invalid VME base alignment\n");
+ return -EINVAL;
+ }
+ if (vme_bound & (granularity - 1)) {
+ pr_err("Invalid VME bound alignment\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&image->mtx);
+
+ bridge->slaves[i].enabled = enabled;
+ bridge->slaves[i].vme_base = vme_base;
+ bridge->slaves[i].size = size;
+ bridge->slaves[i].buf_base = fake_pci_to_ptr(buf_base);
+ bridge->slaves[i].aspace = aspace;
+ bridge->slaves[i].cycle = cycle;
+
+ mutex_unlock(&image->mtx);
+
+ return 0;
+}
+
+/*
+ * Get slave window configuration.
+ */
+static int fake_slave_get(struct vme_slave_resource *image, int *enabled,
+ unsigned long long *vme_base, unsigned long long *size,
+ dma_addr_t *buf_base, u32 *aspace, u32 *cycle)
+{
+ unsigned int i;
+ struct fake_driver *bridge;
+
+ bridge = image->parent->driver_priv;
+
+ i = image->number;
+
+ mutex_lock(&image->mtx);
+
+ *enabled = bridge->slaves[i].enabled;
+ *vme_base = bridge->slaves[i].vme_base;
+ *size = bridge->slaves[i].size;
+ *buf_base = fake_ptr_to_pci(bridge->slaves[i].buf_base);
+ *aspace = bridge->slaves[i].aspace;
+ *cycle = bridge->slaves[i].cycle;
+
+ mutex_unlock(&image->mtx);
+
+ return 0;
+}
+
+/*
+ * Set the attributes of an outbound window.
+ */
+static int fake_master_set(struct vme_master_resource *image, int enabled,
+ unsigned long long vme_base, unsigned long long size,
+ u32 aspace, u32 cycle, u32 dwidth)
+{
+ int retval = 0;
+ unsigned int i;
+ struct vme_bridge *fake_bridge;
+ struct fake_driver *bridge;
+
+ fake_bridge = image->parent;
+
+ bridge = fake_bridge->driver_priv;
+
+ /* Verify input data */
+ if (vme_base & 0xFFFF) {
+ pr_err("Invalid VME Window alignment\n");
+ retval = -EINVAL;
+ goto err_window;
+ }
+
+ if (size & 0xFFFF) {
+ pr_err("Invalid size alignment\n");
+ retval = -EINVAL;
+ goto err_window;
+ }
+
+ if ((size == 0) && (enabled != 0)) {
+ pr_err("Size must be non-zero for enabled windows\n");
+ retval = -EINVAL;
+ goto err_window;
+ }
+
+ /* Setup data width */
+ switch (dwidth) {
+ case VME_D8:
+ case VME_D16:
+ case VME_D32:
+ break;
+ default:
+ pr_err("Invalid data width\n");
+ retval = -EINVAL;
+ goto err_dwidth;
+ }
+
+ /* Setup address space */
+ switch (aspace) {
+ case VME_A16:
+ case VME_A24:
+ case VME_A32:
+ case VME_A64:
+ case VME_CRCSR:
+ case VME_USER1:
+ case VME_USER2:
+ case VME_USER3:
+ case VME_USER4:
+ break;
+ default:
+ pr_err("Invalid address space\n");
+ retval = -EINVAL;
+ goto err_aspace;
+ }
+
+ spin_lock(&image->lock);
+
+ i = image->number;
+
+ bridge->masters[i].enabled = enabled;
+ bridge->masters[i].vme_base = vme_base;
+ bridge->masters[i].size = size;
+ bridge->masters[i].aspace = aspace;
+ bridge->masters[i].cycle = cycle;
+ bridge->masters[i].dwidth = dwidth;
+
+ spin_unlock(&image->lock);
+
+ return 0;
+
+err_aspace:
+err_dwidth:
+err_window:
+ return retval;
+
+}
+
+/*
+ * Set the attributes of an outbound window.
+ */
+static int __fake_master_get(struct vme_master_resource *image, int *enabled,
+ unsigned long long *vme_base, unsigned long long *size,
+ u32 *aspace, u32 *cycle, u32 *dwidth)
+{
+ unsigned int i;
+ struct fake_driver *bridge;
+
+ bridge = image->parent->driver_priv;
+
+ i = image->number;
+
+ *enabled = bridge->masters[i].enabled;
+ *vme_base = bridge->masters[i].vme_base;
+ *size = bridge->masters[i].size;
+ *aspace = bridge->masters[i].aspace;
+ *cycle = bridge->masters[i].cycle;
+ *dwidth = bridge->masters[i].dwidth;
+
+ return 0;
+}
+
+
+static int fake_master_get(struct vme_master_resource *image, int *enabled,
+ unsigned long long *vme_base, unsigned long long *size,
+ u32 *aspace, u32 *cycle, u32 *dwidth)
+{
+ int retval;
+
+ spin_lock(&image->lock);
+
+ retval = __fake_master_get(image, enabled, vme_base, size, aspace,
+ cycle, dwidth);
+
+ spin_unlock(&image->lock);
+
+ return retval;
+}
+
+
+static void fake_lm_check(struct fake_driver *bridge, unsigned long long addr,
+ u32 aspace, u32 cycle)
+{
+ struct vme_bridge *fake_bridge;
+ unsigned long long lm_base;
+ u32 lm_aspace, lm_cycle;
+ int i;
+ struct vme_lm_resource *lm;
+ struct list_head *pos = NULL, *n;
+
+ /* Get vme_bridge */
+ fake_bridge = bridge->parent;
+
+ /* Loop through each location monitor resource */
+ list_for_each_safe(pos, n, &fake_bridge->lm_resources) {
+ lm = list_entry(pos, struct vme_lm_resource, list);
+
+ /* If disabled, we're done */
+ if (bridge->lm_enabled == 0)
+ return;
+
+ lm_base = bridge->lm_base;
+ lm_aspace = bridge->lm_aspace;
+ lm_cycle = bridge->lm_cycle;
+
+ /* First make sure that the cycle and address space match */
+ if ((lm_aspace == aspace) && (lm_cycle == cycle)) {
+ for (i = 0; i < lm->monitors; i++) {
+ /* Each location monitor covers 8 bytes */
+ if (((lm_base + (8 * i)) <= addr) &&
+ ((lm_base + (8 * i) + 8) > addr)) {
+ if (bridge->lm_callback[i] != NULL)
+ bridge->lm_callback[i](
+ bridge->lm_data[i]);
+ }
+ }
+ }
+ }
+}
+
+static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr,
+ u32 aspace, u32 cycle)
+{
+ u8 retval = 0xff;
+ int i;
+ unsigned long long start, end, offset;
+ u8 *loc;
+
+ for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+ start = bridge->slaves[i].vme_base;
+ end = bridge->slaves[i].vme_base + bridge->slaves[i].size;
+
+ if (aspace != bridge->slaves[i].aspace)
+ continue;
+
+ if (cycle != bridge->slaves[i].cycle)
+ continue;
+
+ if ((addr >= start) && (addr < end)) {
+ offset = addr - bridge->slaves[i].vme_base;
+ loc = (u8 *)(bridge->slaves[i].buf_base + offset);
+ retval = *loc;
+
+ break;
+ }
+ }
+
+ fake_lm_check(bridge, addr, aspace, cycle);
+
+ return retval;
+}
+
+static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr,
+ u32 aspace, u32 cycle)
+{
+ u16 retval = 0xffff;
+ int i;
+ unsigned long long start, end, offset;
+ u16 *loc;
+
+ for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+ if (aspace != bridge->slaves[i].aspace)
+ continue;
+
+ if (cycle != bridge->slaves[i].cycle)
+ continue;
+
+ start = bridge->slaves[i].vme_base;
+ end = bridge->slaves[i].vme_base + bridge->slaves[i].size;
+
+ if ((addr >= start) && ((addr + 1) < end)) {
+ offset = addr - bridge->slaves[i].vme_base;
+ loc = (u16 *)(bridge->slaves[i].buf_base + offset);
+ retval = *loc;
+
+ break;
+ }
+ }
+
+ fake_lm_check(bridge, addr, aspace, cycle);
+
+ return retval;
+}
+
+static u32 fake_vmeread32(struct fake_driver *bridge, unsigned long long addr,
+ u32 aspace, u32 cycle)
+{
+ u32 retval = 0xffffffff;
+ int i;
+ unsigned long long start, end, offset;
+ u32 *loc;
+
+ for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+ if (aspace != bridge->slaves[i].aspace)
+ continue;
+
+ if (cycle != bridge->slaves[i].cycle)
+ continue;
+
+ start = bridge->slaves[i].vme_base;
+ end = bridge->slaves[i].vme_base + bridge->slaves[i].size;
+
+ if ((addr >= start) && ((addr + 3) < end)) {
+ offset = addr - bridge->slaves[i].vme_base;
+ loc = (u32 *)(bridge->slaves[i].buf_base + offset);
+ retval = *loc;
+
+ break;
+ }
+ }
+
+ fake_lm_check(bridge, addr, aspace, cycle);
+
+ return retval;
+}
+
+static ssize_t fake_master_read(struct vme_master_resource *image, void *buf,
+ size_t count, loff_t offset)
+{
+ int retval;
+ u32 aspace, cycle, dwidth;
+ struct vme_bridge *fake_bridge;
+ struct fake_driver *priv;
+ int i;
+ unsigned long long addr;
+ unsigned int done = 0;
+ unsigned int count32;
+
+ fake_bridge = image->parent;
+
+ priv = fake_bridge->driver_priv;
+
+ i = image->number;
+
+ addr = (unsigned long long)priv->masters[i].vme_base + offset;
+ aspace = priv->masters[i].aspace;
+ cycle = priv->masters[i].cycle;
+ dwidth = priv->masters[i].dwidth;
+
+ spin_lock(&image->lock);
+
+ /* The following code handles VME address alignment. We cannot use
+ * memcpy_xxx here because it may cut data transfers in to 8-bit
+ * cycles when D16 or D32 cycles are required on the VME bus.
+ * On the other hand, the bridge itself assures that the maximum data
+ * cycle configured for the transfer is used and splits it
+ * automatically for non-aligned addresses, so we don't want the
+ * overhead of needlessly forcing small transfers for the entire cycle.
+ */
+ if (addr & 0x1) {
+ *(u8 *)buf = fake_vmeread8(priv, addr, aspace, cycle);
+ done += 1;
+ if (done == count)
+ goto out;
+ }
+ if ((dwidth == VME_D16) || (dwidth == VME_D32)) {
+ if ((addr + done) & 0x2) {
+ if ((count - done) < 2) {
+ *(u8 *)(buf + done) = fake_vmeread8(priv,
+ addr + done, aspace, cycle);
+ done += 1;
+ goto out;
+ } else {
+ *(u16 *)(buf + done) = fake_vmeread16(priv,
+ addr + done, aspace, cycle);
+ done += 2;
+ }
+ }
+ }
+
+ if (dwidth == VME_D32) {
+ count32 = (count - done) & ~0x3;
+ while (done < count32) {
+ *(u32 *)(buf + done) = fake_vmeread32(priv, addr + done,
+ aspace, cycle);
+ done += 4;
+ }
+ } else if (dwidth == VME_D16) {
+ count32 = (count - done) & ~0x3;
+ while (done < count32) {
+ *(u16 *)(buf + done) = fake_vmeread16(priv, addr + done,
+ aspace, cycle);
+ done += 2;
+ }
+ } else if (dwidth == VME_D8) {
+ count32 = (count - done);
+ while (done < count32) {
+ *(u8 *)(buf + done) = fake_vmeread8(priv, addr + done,
+ aspace, cycle);
+ done += 1;
+ }
+
+ }
+
+ if ((dwidth == VME_D16) || (dwidth == VME_D32)) {
+ if ((count - done) & 0x2) {
+ *(u16 *)(buf + done) = fake_vmeread16(priv, addr + done,
+ aspace, cycle);
+ done += 2;
+ }
+ }
+ if ((count - done) & 0x1) {
+ *(u8 *)(buf + done) = fake_vmeread8(priv, addr + done, aspace,
+ cycle);
+ done += 1;
+ }
+
+out:
+ retval = count;
+
+ spin_unlock(&image->lock);
+
+ return retval;
+}
+
+static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf,
+ unsigned long long addr, u32 aspace, u32 cycle)
+{
+ int i;
+ unsigned long long start, end, offset;
+ u8 *loc;
+
+ for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+ if (aspace != bridge->slaves[i].aspace)
+ continue;
+
+ if (cycle != bridge->slaves[i].cycle)
+ continue;
+
+ start = bridge->slaves[i].vme_base;
+ end = bridge->slaves[i].vme_base + bridge->slaves[i].size;
+
+ if ((addr >= start) && (addr < end)) {
+ offset = addr - bridge->slaves[i].vme_base;
+ loc = (u8 *)((void *)bridge->slaves[i].buf_base + offset);
+ *loc = *buf;
+
+ break;
+ }
+ }
+
+ fake_lm_check(bridge, addr, aspace, cycle);
+
+}
+
+static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf,
+ unsigned long long addr, u32 aspace, u32 cycle)
+{
+ int i;
+ unsigned long long start, end, offset;
+ u16 *loc;
+
+ for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+ if (aspace != bridge->slaves[i].aspace)
+ continue;
+
+ if (cycle != bridge->slaves[i].cycle)
+ continue;
+
+ start = bridge->slaves[i].vme_base;
+ end = bridge->slaves[i].vme_base + bridge->slaves[i].size;
+
+ if ((addr >= start) && ((addr + 1) < end)) {
+ offset = addr - bridge->slaves[i].vme_base;
+ loc = (u16 *)((void *)bridge->slaves[i].buf_base + offset);
+ *loc = *buf;
+
+ break;
+ }
+ }
+
+ fake_lm_check(bridge, addr, aspace, cycle);
+
+}
+
+static void fake_vmewrite32(struct fake_driver *bridge, u32 *buf,
+ unsigned long long addr, u32 aspace, u32 cycle)
+{
+ int i;
+ unsigned long long start, end, offset;
+ u32 *loc;
+
+ for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+ if (aspace != bridge->slaves[i].aspace)
+ continue;
+
+ if (cycle != bridge->slaves[i].cycle)
+ continue;
+
+ start = bridge->slaves[i].vme_base;
+ end = bridge->slaves[i].vme_base + bridge->slaves[i].size;
+
+ if ((addr >= start) && ((addr + 3) < end)) {
+ offset = addr - bridge->slaves[i].vme_base;
+ loc = (u32 *)((void *)bridge->slaves[i].buf_base + offset);
+ *loc = *buf;
+
+ break;
+ }
+ }
+
+ fake_lm_check(bridge, addr, aspace, cycle);
+
+}
+
+static ssize_t fake_master_write(struct vme_master_resource *image, void *buf,
+ size_t count, loff_t offset)
+{
+ int retval = 0;
+ u32 aspace, cycle, dwidth;
+ unsigned long long addr;
+ int i;
+ unsigned int done = 0;
+ unsigned int count32;
+
+ struct vme_bridge *fake_bridge;
+ struct fake_driver *bridge;
+
+ fake_bridge = image->parent;
+
+ bridge = fake_bridge->driver_priv;
+
+ i = image->number;
+
+ addr = bridge->masters[i].vme_base + offset;
+ aspace = bridge->masters[i].aspace;
+ cycle = bridge->masters[i].cycle;
+ dwidth = bridge->masters[i].dwidth;
+
+ spin_lock(&image->lock);
+
+ /* Here we apply for the same strategy we do in master_read
+ * function in order to assure the correct cycles.
+ */
+ if (addr & 0x1) {
+ fake_vmewrite8(bridge, (u8 *)buf, addr, aspace, cycle);
+ done += 1;
+ if (done == count)
+ goto out;
+ }
+
+ if ((dwidth == VME_D16) || (dwidth == VME_D32)) {
+ if ((addr + done) & 0x2) {
+ if ((count - done) < 2) {
+ fake_vmewrite8(bridge, (u8 *)(buf + done),
+ addr + done, aspace, cycle);
+ done += 1;
+ goto out;
+ } else {
+ fake_vmewrite16(bridge, (u16 *)(buf + done),
+ addr + done, aspace, cycle);
+ done += 2;
+ }
+ }
+ }
+
+ if (dwidth == VME_D32) {
+ count32 = (count - done) & ~0x3;
+ while (done < count32) {
+ fake_vmewrite32(bridge, (u32 *)(buf + done),
+ addr + done, aspace, cycle);
+ done += 4;
+ }
+ } else if (dwidth == VME_D16) {
+ count32 = (count - done) & ~0x3;
+ while (done < count32) {
+ fake_vmewrite16(bridge, (u16 *)(buf + done),
+ addr + done, aspace, cycle);
+ done += 2;
+ }
+ } else if (dwidth == VME_D8) {
+ count32 = (count - done);
+ while (done < count32) {
+ fake_vmewrite8(bridge, (u8 *)(buf + done), addr + done,
+ aspace, cycle);
+ done += 1;
+ }
+
+ }
+
+ if ((dwidth == VME_D16) || (dwidth == VME_D32)) {
+ if ((count - done) & 0x2) {
+ fake_vmewrite16(bridge, (u16 *)(buf + done),
+ addr + done, aspace, cycle);
+ done += 2;
+ }
+ }
+
+ if ((count - done) & 0x1) {
+ fake_vmewrite8(bridge, (u8 *)(buf + done), addr + done, aspace,
+ cycle);
+ done += 1;
+ }
+
+out:
+ retval = count;
+
+ spin_unlock(&image->lock);
+
+ return retval;
+}
+
+/*
+ * Perform an RMW cycle on the VME bus.
+ *
+ * Requires a previously configured master window, returns final value.
+ */
+static unsigned int fake_master_rmw(struct vme_master_resource *image,
+ unsigned int mask, unsigned int compare, unsigned int swap,
+ loff_t offset)
+{
+ u32 tmp, base;
+ u32 aspace, cycle;
+ int i;
+ struct fake_driver *bridge;
+
+ bridge = image->parent->driver_priv;
+
+ /* Find the PCI address that maps to the desired VME address */
+ i = image->number;
+
+ base = bridge->masters[i].vme_base;
+ aspace = bridge->masters[i].aspace;
+ cycle = bridge->masters[i].cycle;
+
+ /* Lock image */
+ spin_lock(&image->lock);
+
+ /* Read existing value */
+ tmp = fake_vmeread32(bridge, base + offset, aspace, cycle);
+
+ /* Perform check */
+ if ((tmp && mask) == (compare && mask)) {
+ tmp = tmp | (mask | swap);
+ tmp = tmp & (~mask | swap);
+
+ /* Write back */
+ fake_vmewrite32(bridge, &tmp, base + offset, aspace, cycle);
+ }
+
+ /* Unlock image */
+ spin_unlock(&image->lock);
+
+ return tmp;
+}
+
+/*
+ * All 4 location monitors reside at the same base - this is therefore a
+ * system wide configuration.
+ *
+ * This does not enable the LM monitor - that should be done when the first
+ * callback is attached and disabled when the last callback is removed.
+ */
+static int fake_lm_set(struct vme_lm_resource *lm, unsigned long long lm_base,
+ u32 aspace, u32 cycle)
+{
+ int i;
+ struct vme_bridge *fake_bridge;
+ struct fake_driver *bridge;
+
+ fake_bridge = lm->parent;
+
+ bridge = fake_bridge->driver_priv;
+
+ mutex_lock(&lm->mtx);
+
+ /* If we already have a callback attached, we can't move it! */
+ for (i = 0; i < lm->monitors; i++) {
+ if (bridge->lm_callback[i] != NULL) {
+ mutex_unlock(&lm->mtx);
+ pr_err("Location monitor callback attached, can't reset\n");
+ return -EBUSY;
+ }
+ }
+
+ switch (aspace) {
+ case VME_A16:
+ case VME_A24:
+ case VME_A32:
+ case VME_A64:
+ break;
+ default:
+ mutex_unlock(&lm->mtx);
+ pr_err("Invalid address space\n");
+ return -EINVAL;
+ }
+
+ bridge->lm_base = lm_base;
+ bridge->lm_aspace = aspace;
+ bridge->lm_cycle = cycle;
+
+ mutex_unlock(&lm->mtx);
+
+ return 0;
+}
+
+/* Get configuration of the callback monitor and return whether it is enabled
+ * or disabled.
+ */
+static int fake_lm_get(struct vme_lm_resource *lm,
+ unsigned long long *lm_base, u32 *aspace, u32 *cycle)
+{
+ struct fake_driver *bridge;
+
+ bridge = lm->parent->driver_priv;
+
+ mutex_lock(&lm->mtx);
+
+ *lm_base = bridge->lm_base;
+ *aspace = bridge->lm_aspace;
+ *cycle = bridge->lm_cycle;
+
+ mutex_unlock(&lm->mtx);
+
+ return bridge->lm_enabled;
+}
+
+/*
+ * Attach a callback to a specific location monitor.
+ *
+ * Callback will be passed the monitor triggered.
+ */
+static int fake_lm_attach(struct vme_lm_resource *lm, int monitor,
+ void (*callback)(void *), void *data)
+{
+ struct vme_bridge *fake_bridge;
+ struct fake_driver *bridge;
+
+ fake_bridge = lm->parent;
+
+ bridge = fake_bridge->driver_priv;
+
+ mutex_lock(&lm->mtx);
+
+ /* Ensure that the location monitor is configured - need PGM or DATA */
+ if (bridge->lm_cycle == 0) {
+ mutex_unlock(&lm->mtx);
+ pr_err("Location monitor not properly configured\n");
+ return -EINVAL;
+ }
+
+ /* Check that a callback isn't already attached */
+ if (bridge->lm_callback[monitor] != NULL) {
+ mutex_unlock(&lm->mtx);
+ pr_err("Existing callback attached\n");
+ return -EBUSY;
+ }
+
+ /* Attach callback */
+ bridge->lm_callback[monitor] = callback;
+ bridge->lm_data[monitor] = data;
+
+ /* Ensure that global Location Monitor Enable set */
+ bridge->lm_enabled = 1;
+
+ mutex_unlock(&lm->mtx);
+
+ return 0;
+}
+
+/*
+ * Detach a callback function forn a specific location monitor.
+ */
+static int fake_lm_detach(struct vme_lm_resource *lm, int monitor)
+{
+ u32 tmp;
+ int i;
+ struct fake_driver *bridge;
+
+ bridge = lm->parent->driver_priv;
+
+ mutex_lock(&lm->mtx);
+
+ /* Detach callback */
+ bridge->lm_callback[monitor] = NULL;
+ bridge->lm_data[monitor] = NULL;
+
+ /* If all location monitors disabled, disable global Location Monitor */
+ tmp = 0;
+ for (i = 0; i < lm->monitors; i++) {
+ if (bridge->lm_callback[i] != NULL)
+ tmp = 1;
+ }
+
+ if (tmp == 0)
+ bridge->lm_enabled = 0;
+
+ mutex_unlock(&lm->mtx);
+
+ return 0;
+}
+
+/*
+ * Determine Geographical Addressing
+ */
+static int fake_slot_get(struct vme_bridge *fake_bridge)
+{
+ return geoid;
+}
+
+static void *fake_alloc_consistent(struct device *parent, size_t size,
+ dma_addr_t *dma)
+{
+ void *alloc = kmalloc(size, GFP_KERNEL);
+
+ if (alloc != NULL)
+ *dma = fake_ptr_to_pci(alloc);
+
+ return alloc;
+}
+
+static void fake_free_consistent(struct device *parent, size_t size,
+ void *vaddr, dma_addr_t dma)
+{
+ kfree(vaddr);
+/*
+ dma_free_coherent(parent, size, vaddr, dma);
+*/
+}
+
+/*
+ * Configure CR/CSR space
+ *
+ * Access to the CR/CSR can be configured at power-up. The location of the
+ * CR/CSR registers in the CR/CSR address space is determined by the boards
+ * Geographic address.
+ *
+ * Each board has a 512kB window, with the highest 4kB being used for the
+ * boards registers, this means there is a fix length 508kB window which must
+ * be mapped onto PCI memory.
+ */
+static int fake_crcsr_init(struct vme_bridge *fake_bridge)
+{
+ u32 vstat;
+ struct fake_driver *bridge;
+
+ bridge = fake_bridge->driver_priv;
+
+ /* Allocate mem for CR/CSR image */
+ bridge->crcsr_kernel = kzalloc(VME_CRCSR_BUF_SIZE, GFP_KERNEL);
+ bridge->crcsr_bus = fake_ptr_to_pci(bridge->crcsr_kernel);
+ if (bridge->crcsr_kernel == NULL)
+ return -ENOMEM;
+
+ vstat = fake_slot_get(fake_bridge);
+
+ pr_info("CR/CSR Offset: %d\n", vstat);
+
+ return 0;
+}
+
+static void fake_crcsr_exit(struct vme_bridge *fake_bridge)
+{
+ struct fake_driver *bridge;
+
+ bridge = fake_bridge->driver_priv;
+
+ kfree(bridge->crcsr_kernel);
+}
+
+
+static int __init fake_init(void)
+{
+ int retval, i;
+ struct list_head *pos = NULL, *n;
+ struct vme_bridge *fake_bridge;
+ struct fake_driver *fake_device;
+ struct vme_master_resource *master_image;
+ struct vme_slave_resource *slave_image;
+ struct vme_lm_resource *lm;
+
+ /* We need a fake parent device */
+ vme_root = __root_device_register("vme", THIS_MODULE);
+
+ /* If we want to support more than one bridge at some point, we need to
+ * dynamically allocate this so we get one per device.
+ */
+ fake_bridge = kzalloc(sizeof(struct vme_bridge), GFP_KERNEL);
+ if (fake_bridge == NULL) {
+ retval = -ENOMEM;
+ goto err_struct;
+ }
+
+ fake_device = kzalloc(sizeof(struct fake_driver), GFP_KERNEL);
+ if (fake_device == NULL) {
+ retval = -ENOMEM;
+ goto err_driver;
+ }
+
+ fake_bridge->driver_priv = fake_device;
+
+ fake_bridge->parent = vme_root;
+
+ fake_device->parent = fake_bridge;
+
+ /* Initialize wait queues & mutual exclusion flags */
+ mutex_init(&fake_device->vme_int);
+ mutex_init(&fake_bridge->irq_mtx);
+ tasklet_init(&fake_device->int_tasklet, fake_VIRQ_tasklet,
+ (unsigned long) fake_bridge);
+
+ strcpy(fake_bridge->name, driver_name);
+
+ /* Add master windows to list */
+ INIT_LIST_HEAD(&fake_bridge->master_resources);
+ for (i = 0; i < FAKE_MAX_MASTER; i++) {
+ master_image = kmalloc(sizeof(struct vme_master_resource),
+ GFP_KERNEL);
+ if (master_image == NULL) {
+ retval = -ENOMEM;
+ goto err_master;
+ }
+ master_image->parent = fake_bridge;
+ spin_lock_init(&master_image->lock);
+ master_image->locked = 0;
+ master_image->number = i;
+ master_image->address_attr = VME_A16 | VME_A24 | VME_A32 |
+ VME_A64;
+ master_image->cycle_attr = VME_SCT | VME_BLT | VME_MBLT |
+ VME_2eVME | VME_2eSST | VME_2eSSTB | VME_2eSST160 |
+ VME_2eSST267 | VME_2eSST320 | VME_SUPER | VME_USER |
+ VME_PROG | VME_DATA;
+ master_image->width_attr = VME_D16 | VME_D32;
+ memset(&master_image->bus_resource, 0,
+ sizeof(struct resource));
+ master_image->kern_base = NULL;
+ list_add_tail(&master_image->list,
+ &fake_bridge->master_resources);
+ }
+
+ /* Add slave windows to list */
+ INIT_LIST_HEAD(&fake_bridge->slave_resources);
+ for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+ slave_image = kmalloc(sizeof(struct vme_slave_resource),
+ GFP_KERNEL);
+ if (slave_image == NULL) {
+ retval = -ENOMEM;
+ goto err_slave;
+ }
+ slave_image->parent = fake_bridge;
+ mutex_init(&slave_image->mtx);
+ slave_image->locked = 0;
+ slave_image->number = i;
+ slave_image->address_attr = VME_A16 | VME_A24 | VME_A32 |
+ VME_A64 | VME_CRCSR | VME_USER1 | VME_USER2 |
+ VME_USER3 | VME_USER4;
+ slave_image->cycle_attr = VME_SCT | VME_BLT | VME_MBLT |
+ VME_2eVME | VME_2eSST | VME_2eSSTB | VME_2eSST160 |
+ VME_2eSST267 | VME_2eSST320 | VME_SUPER | VME_USER |
+ VME_PROG | VME_DATA;
+ list_add_tail(&slave_image->list,
+ &fake_bridge->slave_resources);
+ }
+
+ /* Add location monitor to list */
+ INIT_LIST_HEAD(&fake_bridge->lm_resources);
+ lm = kmalloc(sizeof(struct vme_lm_resource), GFP_KERNEL);
+ if (lm == NULL) {
+ pr_err("Failed to allocate memory for location monitor resource structure\n");
+ retval = -ENOMEM;
+ goto err_lm;
+ }
+ lm->parent = fake_bridge;
+ mutex_init(&lm->mtx);
+ lm->locked = 0;
+ lm->number = 1;
+ lm->monitors = 4;
+ list_add_tail(&lm->list, &fake_bridge->lm_resources);
+
+ fake_bridge->slave_get = fake_slave_get;
+ fake_bridge->slave_set = fake_slave_set;
+ fake_bridge->master_get = fake_master_get;
+ fake_bridge->master_set = fake_master_set;
+ fake_bridge->master_read = fake_master_read;
+ fake_bridge->master_write = fake_master_write;
+ fake_bridge->master_rmw = fake_master_rmw;
+ fake_bridge->irq_set = fake_irq_set;
+ fake_bridge->irq_generate = fake_irq_generate;
+ fake_bridge->lm_set = fake_lm_set;
+ fake_bridge->lm_get = fake_lm_get;
+ fake_bridge->lm_attach = fake_lm_attach;
+ fake_bridge->lm_detach = fake_lm_detach;
+ fake_bridge->slot_get = fake_slot_get;
+ fake_bridge->alloc_consistent = fake_alloc_consistent;
+ fake_bridge->free_consistent = fake_free_consistent;
+
+ pr_info("Board is%s the VME system controller\n",
+ (geoid == 1) ? "" : " not");
+
+ pr_info("VME geographical address is set to %d\n", geoid);
+
+ retval = fake_crcsr_init(fake_bridge);
+ if (retval) {
+ pr_err("CR/CSR configuration failed.\n");
+ goto err_crcsr;
+ }
+
+ retval = vme_register_bridge(fake_bridge);
+ if (retval != 0) {
+ pr_err("Chip Registration failed.\n");
+ goto err_reg;
+ }
+
+ exit_pointer = fake_bridge;
+
+ return 0;
+
+err_reg:
+ fake_crcsr_exit(fake_bridge);
+err_crcsr:
+err_lm:
+ /* resources are stored in link list */
+ list_for_each_safe(pos, n, &fake_bridge->lm_resources) {
+ lm = list_entry(pos, struct vme_lm_resource, list);
+ list_del(pos);
+ kfree(lm);
+ }
+err_slave:
+ /* resources are stored in link list */
+ list_for_each_safe(pos, n, &fake_bridge->slave_resources) {
+ slave_image = list_entry(pos, struct vme_slave_resource, list);
+ list_del(pos);
+ kfree(slave_image);
+ }
+err_master:
+ /* resources are stored in link list */
+ list_for_each_safe(pos, n, &fake_bridge->master_resources) {
+ master_image = list_entry(pos, struct vme_master_resource,
+ list);
+ list_del(pos);
+ kfree(master_image);
+ }
+
+ kfree(fake_device);
+err_driver:
+ kfree(fake_bridge);
+err_struct:
+ return retval;
+
+}
+
+
+static void __exit fake_exit(void)
+{
+ struct list_head *pos = NULL;
+ struct list_head *tmplist;
+ struct vme_master_resource *master_image;
+ struct vme_slave_resource *slave_image;
+ int i;
+ struct vme_bridge *fake_bridge;
+ struct fake_driver *bridge;
+
+ fake_bridge = exit_pointer;
+
+ bridge = fake_bridge->driver_priv;
+
+ pr_debug("Driver is being unloaded.\n");
+
+ /*
+ * Shutdown all inbound and outbound windows.
+ */
+ for (i = 0; i < FAKE_MAX_MASTER; i++)
+ bridge->masters[i].enabled = 0;
+
+ for (i = 0; i < FAKE_MAX_SLAVE; i++)
+ bridge->slaves[i].enabled = 0;
+
+ /*
+ * Shutdown Location monitor.
+ */
+ bridge->lm_enabled = 0;
+
+ vme_unregister_bridge(fake_bridge);
+
+ fake_crcsr_exit(fake_bridge);
+ /* resources are stored in link list */
+ list_for_each_safe(pos, tmplist, &fake_bridge->slave_resources) {
+ slave_image = list_entry(pos, struct vme_slave_resource, list);
+ list_del(pos);
+ kfree(slave_image);
+ }
+
+ /* resources are stored in link list */
+ list_for_each_safe(pos, tmplist, &fake_bridge->master_resources) {
+ master_image = list_entry(pos, struct vme_master_resource,
+ list);
+ list_del(pos);
+ kfree(master_image);
+ }
+
+ kfree(fake_bridge->driver_priv);
+
+ kfree(fake_bridge);
+
+ root_device_unregister(vme_root);
+}
+
+
+MODULE_PARM_DESC(geoid, "Set geographical addressing");
+module_param(geoid, int, 0);
+
+MODULE_DESCRIPTION("Fake VME bridge driver");
+MODULE_LICENSE("GPL");
+
+module_init(fake_init);
+module_exit(fake_exit);
diff --git a/drivers/vme/bridges/vme_tsi148.c b/drivers/vme/bridges/vme_tsi148.c
index 4bc5d45..fc1b634 100644
--- a/drivers/vme/bridges/vme_tsi148.c
+++ b/drivers/vme/bridges/vme_tsi148.c
@@ -50,6 +50,8 @@
{ },
};
+MODULE_DEVICE_TABLE(pci, tsi148_ids);
+
static struct pci_driver tsi148_driver = {
.name = driver_name,
.id_table = tsi148_ids,
@@ -102,7 +104,7 @@
for (i = 0; i < 4; i++) {
if (stat & TSI148_LCSR_INTS_LMS[i]) {
/* We only enable interrupts if the callback is set */
- bridge->lm_callback[i](i);
+ bridge->lm_callback[i](bridge->lm_data[i]);
serviced |= TSI148_LCSR_INTC_LMC[i];
}
}
@@ -2047,7 +2049,7 @@
* Callback will be passed the monitor triggered.
*/
static int tsi148_lm_attach(struct vme_lm_resource *lm, int monitor,
- void (*callback)(int))
+ void (*callback)(void *), void *data)
{
u32 lm_ctl, tmp;
struct vme_bridge *tsi148_bridge;
@@ -2077,6 +2079,7 @@
/* Attach callback */
bridge->lm_callback[monitor] = callback;
+ bridge->lm_data[monitor] = data;
/* Enable Location Monitor interrupt */
tmp = ioread32be(bridge->base + TSI148_LCSR_INTEN);
@@ -2124,6 +2127,7 @@
/* Detach callback */
bridge->lm_callback[monitor] = NULL;
+ bridge->lm_data[monitor] = NULL;
/* If all location monitors disabled, disable global Location Monitor */
if ((lm_en & (TSI148_LCSR_INTS_LM0S | TSI148_LCSR_INTS_LM1S |
diff --git a/drivers/vme/bridges/vme_tsi148.h b/drivers/vme/bridges/vme_tsi148.h
index f5ed1438..0935d85d 100644
--- a/drivers/vme/bridges/vme_tsi148.h
+++ b/drivers/vme/bridges/vme_tsi148.h
@@ -38,7 +38,8 @@
void __iomem *base; /* Base Address of device registers */
wait_queue_head_t dma_queue[2];
wait_queue_head_t iack_queue;
- void (*lm_callback[4])(int); /* Called in interrupt handler */
+ void (*lm_callback[4])(void *); /* Called in interrupt handler */
+ void *lm_data[4];
void *crcsr_kernel;
dma_addr_t crcsr_bus;
struct vme_master_resource *flush_image;
diff --git a/drivers/vme/vme.c b/drivers/vme/vme.c
index 37ac0a5..15b6407 100644
--- a/drivers/vme/vme.c
+++ b/drivers/vme/vme.c
@@ -13,8 +13,8 @@
* option) any later version.
*/
-#include <linux/module.h>
-#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/export.h>
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -39,7 +39,6 @@
static LIST_HEAD(vme_bus_list);
static DEFINE_MUTEX(vme_buses_lock);
-static void __exit vme_exit(void);
static int __init vme_init(void);
static struct vme_dev *dev_to_vme_dev(struct device *dev)
@@ -1321,7 +1320,7 @@
EXPORT_SYMBOL(vme_lm_get);
int vme_lm_attach(struct vme_resource *resource, int monitor,
- void (*callback)(int))
+ void (*callback)(void *), void *data)
{
struct vme_bridge *bridge = find_bridge(resource);
struct vme_lm_resource *lm;
@@ -1338,7 +1337,7 @@
return -EINVAL;
}
- return bridge->lm_attach(lm, monitor, callback);
+ return bridge->lm_attach(lm, monitor, callback, data);
}
EXPORT_SYMBOL(vme_lm_attach);
@@ -1622,25 +1621,10 @@
return retval;
}
-static int vme_bus_remove(struct device *dev)
-{
- int retval = -ENODEV;
- struct vme_driver *driver;
- struct vme_dev *vdev = dev_to_vme_dev(dev);
-
- driver = dev->platform_data;
-
- if (driver->remove != NULL)
- retval = driver->remove(vdev);
-
- return retval;
-}
-
struct bus_type vme_bus_type = {
.name = "vme",
.match = vme_bus_match,
.probe = vme_bus_probe,
- .remove = vme_bus_remove,
};
EXPORT_SYMBOL(vme_bus_type);
@@ -1648,11 +1632,4 @@
{
return bus_register(&vme_bus_type);
}
-
-static void __exit vme_exit(void)
-{
- bus_unregister(&vme_bus_type);
-}
-
subsys_initcall(vme_init);
-module_exit(vme_exit);
diff --git a/drivers/vme/vme_bridge.h b/drivers/vme/vme_bridge.h
index cb8246f..2662e91 100644
--- a/drivers/vme/vme_bridge.h
+++ b/drivers/vme/vme_bridge.h
@@ -160,7 +160,8 @@
int (*lm_set) (struct vme_lm_resource *, unsigned long long, u32, u32);
int (*lm_get) (struct vme_lm_resource *, unsigned long long *, u32 *,
u32 *);
- int (*lm_attach) (struct vme_lm_resource *, int, void (*callback)(int));
+ int (*lm_attach)(struct vme_lm_resource *, int,
+ void (*callback)(void *), void *);
int (*lm_detach) (struct vme_lm_resource *, int);
/* CR/CSR space functions */
diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
index 581a300..82611f1 100644
--- a/drivers/w1/slaves/w1_therm.c
+++ b/drivers/w1/slaves/w1_therm.c
@@ -66,7 +66,7 @@
/* return the address of the refcnt in the family data */
#define THERM_REFCNT(family_data) \
- (&((struct w1_therm_family_data*)family_data)->refcnt)
+ (&((struct w1_therm_family_data *)family_data)->refcnt)
static int w1_therm_add_slave(struct w1_slave *sl)
{
@@ -81,7 +81,8 @@
static void w1_therm_remove_slave(struct w1_slave *sl)
{
int refcnt = atomic_sub_return(1, THERM_REFCNT(sl->family_data));
- while(refcnt) {
+
+ while (refcnt) {
msleep(1000);
refcnt = atomic_read(THERM_REFCNT(sl->family_data));
}
@@ -151,8 +152,7 @@
.fops = &w1_therm_fops,
};
-struct w1_therm_family_converter
-{
+struct w1_therm_family_converter {
u8 broken;
u16 reserved;
struct w1_family *f;
@@ -293,7 +293,7 @@
uint8_t precision_bits;
uint8_t mask = 0x60;
- if(val > 12 || val < 9) {
+ if (val > 12 || val < 9) {
pr_warn("Unsupported precision\n");
return -1;
}
@@ -336,7 +336,8 @@
/* read values to only alter precision bits */
w1_write_8(dev, W1_READ_SCRATCHPAD);
- if ((count = w1_read_block(dev, rom, 9)) != 9)
+ count = w1_read_block(dev, rom, 9);
+ if (count != 9)
dev_warn(device, "w1_read_block() returned %u instead of 9.\n", count);
crc = w1_calc_crc8(rom, 8);
@@ -366,6 +367,7 @@
static inline int w1_DS18B20_convert_temp(u8 rom[9])
{
s16 t = le16_to_cpup((__le16 *)rom);
+
return t*1000/16;
}
@@ -415,7 +417,7 @@
for (i = 0; i < ARRAY_SIZE(w1_therm_families); ++i) {
if (w1_therm_families[i].f->fid == sl->family->fid) {
/* zero value indicates to write current configuration to eeprom */
- if (0 == val)
+ if (val == 0)
ret = w1_therm_families[i].eeprom(device);
else
ret = w1_therm_families[i].precision(device, val);
@@ -439,8 +441,7 @@
if (ret != 0)
goto post_unlock;
- if(!sl->family_data)
- {
+ if (!sl->family_data) {
ret = -ENODEV;
goto pre_unlock;
}
@@ -495,7 +496,8 @@
if (!w1_reset_select_slave(sl)) {
w1_write_8(dev, W1_READ_SCRATCHPAD);
- if ((count = w1_read_block(dev, rom, 9)) != 9) {
+ count = w1_read_block(dev, rom, 9);
+ if (count != 9) {
dev_warn(device, "w1_read_block() "
"returned %u instead of 9.\n",
count);
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index bb34362..e213c67 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -53,8 +53,8 @@
module_param_named(timeout, w1_timeout, int, 0);
MODULE_PARM_DESC(timeout, "time in seconds between automatic slave searches");
module_param_named(timeout_us, w1_timeout_us, int, 0);
-MODULE_PARM_DESC(timeout, "time in microseconds between automatic slave"
- " searches");
+MODULE_PARM_DESC(timeout_us,
+ "time in microseconds between automatic slave searches");
/* A search stops when w1_max_slave_count devices have been found in that
* search. The next search will start over and detect the same set of devices
* on a static 1-wire bus. Memory is not allocated based on this number, just
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 1bffe00..50dbaa8 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -152,6 +152,19 @@
This driver can be built as a module. The module name is tangox_wdt.
+config WDAT_WDT
+ tristate "ACPI Watchdog Action Table (WDAT)"
+ depends on ACPI
+ select ACPI_WATCHDOG
+ help
+ This driver adds support for systems with ACPI Watchdog Action
+ Table (WDAT) table. Servers typically have this but it can be
+ found on some desktop machines as well. This driver will take
+ over the native iTCO watchdog driver found on many Intel CPUs.
+
+ To compile this driver as module, choose M here: the module will
+ be called wdat_wdt.
+
config WM831X_WATCHDOG
tristate "WM831x watchdog"
depends on MFD_WM831X
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index c22ad3e..cba0043 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -202,6 +202,7 @@
obj-$(CONFIG_DA9063_WATCHDOG) += da9063_wdt.o
obj-$(CONFIG_GPIO_WATCHDOG) += gpio_wdt.o
obj-$(CONFIG_TANGOX_WATCHDOG) += tangox_wdt.o
+obj-$(CONFIG_WDAT_WDT) += wdat_wdt.o
obj-$(CONFIG_WM831X_WATCHDOG) += wm831x_wdt.o
obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o
obj-$(CONFIG_MAX63XX_WATCHDOG) += max63xx_wdt.o
diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c
new file mode 100644
index 0000000..e473e3b
--- /dev/null
+++ b/drivers/watchdog/wdat_wdt.c
@@ -0,0 +1,526 @@
+/*
+ * ACPI Hardware Watchdog (WDAT) driver.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/watchdog.h>
+
+#define MAX_WDAT_ACTIONS ACPI_WDAT_ACTION_RESERVED
+
+/**
+ * struct wdat_instruction - Single ACPI WDAT instruction
+ * @entry: Copy of the ACPI table instruction
+ * @reg: Register the instruction is accessing
+ * @node: Next instruction in action sequence
+ */
+struct wdat_instruction {
+ struct acpi_wdat_entry entry;
+ void __iomem *reg;
+ struct list_head node;
+};
+
+/**
+ * struct wdat_wdt - ACPI WDAT watchdog device
+ * @pdev: Parent platform device
+ * @wdd: Watchdog core device
+ * @period: How long is one watchdog period in ms
+ * @stopped_in_sleep: Is this watchdog stopped by the firmware in S1-S5
+ * @stopped: Was the watchdog stopped by the driver in suspend
+ * @actions: An array of instruction lists indexed by an action number from
+ * the WDAT table. There can be %NULL entries for not implemented
+ * actions.
+ */
+struct wdat_wdt {
+ struct platform_device *pdev;
+ struct watchdog_device wdd;
+ unsigned int period;
+ bool stopped_in_sleep;
+ bool stopped;
+ struct list_head *instructions[MAX_WDAT_ACTIONS];
+};
+
+#define to_wdat_wdt(wdd) container_of(wdd, struct wdat_wdt, wdd)
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+ __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static int wdat_wdt_read(struct wdat_wdt *wdat,
+ const struct wdat_instruction *instr, u32 *value)
+{
+ const struct acpi_generic_address *gas = &instr->entry.register_region;
+
+ switch (gas->access_width) {
+ case 1:
+ *value = ioread8(instr->reg);
+ break;
+ case 2:
+ *value = ioread16(instr->reg);
+ break;
+ case 3:
+ *value = ioread32(instr->reg);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ dev_dbg(&wdat->pdev->dev, "Read %#x from 0x%08llx\n", *value,
+ gas->address);
+
+ return 0;
+}
+
+static int wdat_wdt_write(struct wdat_wdt *wdat,
+ const struct wdat_instruction *instr, u32 value)
+{
+ const struct acpi_generic_address *gas = &instr->entry.register_region;
+
+ switch (gas->access_width) {
+ case 1:
+ iowrite8((u8)value, instr->reg);
+ break;
+ case 2:
+ iowrite16((u16)value, instr->reg);
+ break;
+ case 3:
+ iowrite32(value, instr->reg);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ dev_dbg(&wdat->pdev->dev, "Wrote %#x to 0x%08llx\n", value,
+ gas->address);
+
+ return 0;
+}
+
+static int wdat_wdt_run_action(struct wdat_wdt *wdat, unsigned int action,
+ u32 param, u32 *retval)
+{
+ struct wdat_instruction *instr;
+
+ if (action >= ARRAY_SIZE(wdat->instructions))
+ return -EINVAL;
+
+ if (!wdat->instructions[action])
+ return -EOPNOTSUPP;
+
+ dev_dbg(&wdat->pdev->dev, "Running action %#x\n", action);
+
+ /* Run each instruction sequentially */
+ list_for_each_entry(instr, wdat->instructions[action], node) {
+ const struct acpi_wdat_entry *entry = &instr->entry;
+ const struct acpi_generic_address *gas;
+ u32 flags, value, mask, x, y;
+ bool preserve;
+ int ret;
+
+ gas = &entry->register_region;
+
+ preserve = entry->instruction & ACPI_WDAT_PRESERVE_REGISTER;
+ flags = entry->instruction & ~ACPI_WDAT_PRESERVE_REGISTER;
+ value = entry->value;
+ mask = entry->mask;
+
+ switch (flags) {
+ case ACPI_WDAT_READ_VALUE:
+ ret = wdat_wdt_read(wdat, instr, &x);
+ if (ret)
+ return ret;
+ x >>= gas->bit_offset;
+ x &= mask;
+ if (retval)
+ *retval = x == value;
+ break;
+
+ case ACPI_WDAT_READ_COUNTDOWN:
+ ret = wdat_wdt_read(wdat, instr, &x);
+ if (ret)
+ return ret;
+ x >>= gas->bit_offset;
+ x &= mask;
+ if (retval)
+ *retval = x;
+ break;
+
+ case ACPI_WDAT_WRITE_VALUE:
+ x = value & mask;
+ x <<= gas->bit_offset;
+ if (preserve) {
+ ret = wdat_wdt_read(wdat, instr, &y);
+ if (ret)
+ return ret;
+ y = y & ~(mask << gas->bit_offset);
+ x |= y;
+ }
+ ret = wdat_wdt_write(wdat, instr, x);
+ if (ret)
+ return ret;
+ break;
+
+ case ACPI_WDAT_WRITE_COUNTDOWN:
+ x = param;
+ x &= mask;
+ x <<= gas->bit_offset;
+ if (preserve) {
+ ret = wdat_wdt_read(wdat, instr, &y);
+ if (ret)
+ return ret;
+ y = y & ~(mask << gas->bit_offset);
+ x |= y;
+ }
+ ret = wdat_wdt_write(wdat, instr, x);
+ if (ret)
+ return ret;
+ break;
+
+ default:
+ dev_err(&wdat->pdev->dev, "Unknown instruction: %u\n",
+ flags);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int wdat_wdt_enable_reboot(struct wdat_wdt *wdat)
+{
+ int ret;
+
+ /*
+ * WDAT specification says that the watchdog is required to reboot
+ * the system when it fires. However, it also states that it is
+ * recommeded to make it configurable through hardware register. We
+ * enable reboot now if it is configrable, just in case.
+ */
+ ret = wdat_wdt_run_action(wdat, ACPI_WDAT_SET_REBOOT, 0, NULL);
+ if (ret && ret != -EOPNOTSUPP) {
+ dev_err(&wdat->pdev->dev,
+ "Failed to enable reboot when watchdog triggers\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void wdat_wdt_boot_status(struct wdat_wdt *wdat)
+{
+ u32 boot_status = 0;
+ int ret;
+
+ ret = wdat_wdt_run_action(wdat, ACPI_WDAT_GET_STATUS, 0, &boot_status);
+ if (ret && ret != -EOPNOTSUPP) {
+ dev_err(&wdat->pdev->dev, "Failed to read boot status\n");
+ return;
+ }
+
+ if (boot_status)
+ wdat->wdd.bootstatus = WDIOF_CARDRESET;
+
+ /* Clear the boot status in case BIOS did not do it */
+ ret = wdat_wdt_run_action(wdat, ACPI_WDAT_SET_STATUS, 0, NULL);
+ if (ret && ret != -EOPNOTSUPP)
+ dev_err(&wdat->pdev->dev, "Failed to clear boot status\n");
+}
+
+static void wdat_wdt_set_running(struct wdat_wdt *wdat)
+{
+ u32 running = 0;
+ int ret;
+
+ ret = wdat_wdt_run_action(wdat, ACPI_WDAT_GET_RUNNING_STATE, 0,
+ &running);
+ if (ret && ret != -EOPNOTSUPP)
+ dev_err(&wdat->pdev->dev, "Failed to read running state\n");
+
+ if (running)
+ set_bit(WDOG_HW_RUNNING, &wdat->wdd.status);
+}
+
+static int wdat_wdt_start(struct watchdog_device *wdd)
+{
+ return wdat_wdt_run_action(to_wdat_wdt(wdd),
+ ACPI_WDAT_SET_RUNNING_STATE, 0, NULL);
+}
+
+static int wdat_wdt_stop(struct watchdog_device *wdd)
+{
+ return wdat_wdt_run_action(to_wdat_wdt(wdd),
+ ACPI_WDAT_SET_STOPPED_STATE, 0, NULL);
+}
+
+static int wdat_wdt_ping(struct watchdog_device *wdd)
+{
+ return wdat_wdt_run_action(to_wdat_wdt(wdd), ACPI_WDAT_RESET, 0, NULL);
+}
+
+static int wdat_wdt_set_timeout(struct watchdog_device *wdd,
+ unsigned int timeout)
+{
+ struct wdat_wdt *wdat = to_wdat_wdt(wdd);
+ unsigned int periods;
+ int ret;
+
+ periods = timeout * 1000 / wdat->period;
+ ret = wdat_wdt_run_action(wdat, ACPI_WDAT_SET_COUNTDOWN, periods, NULL);
+ if (!ret)
+ wdd->timeout = timeout;
+ return ret;
+}
+
+static unsigned int wdat_wdt_get_timeleft(struct watchdog_device *wdd)
+{
+ struct wdat_wdt *wdat = to_wdat_wdt(wdd);
+ u32 periods = 0;
+
+ wdat_wdt_run_action(wdat, ACPI_WDAT_GET_COUNTDOWN, 0, &periods);
+ return periods * wdat->period / 1000;
+}
+
+static const struct watchdog_info wdat_wdt_info = {
+ .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
+ .firmware_version = 0,
+ .identity = "wdat_wdt",
+};
+
+static const struct watchdog_ops wdat_wdt_ops = {
+ .owner = THIS_MODULE,
+ .start = wdat_wdt_start,
+ .stop = wdat_wdt_stop,
+ .ping = wdat_wdt_ping,
+ .set_timeout = wdat_wdt_set_timeout,
+ .get_timeleft = wdat_wdt_get_timeleft,
+};
+
+static int wdat_wdt_probe(struct platform_device *pdev)
+{
+ const struct acpi_wdat_entry *entries;
+ const struct acpi_table_wdat *tbl;
+ struct wdat_wdt *wdat;
+ struct resource *res;
+ void __iomem **regs;
+ acpi_status status;
+ int i, ret;
+
+ status = acpi_get_table(ACPI_SIG_WDAT, 0,
+ (struct acpi_table_header **)&tbl);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ wdat = devm_kzalloc(&pdev->dev, sizeof(*wdat), GFP_KERNEL);
+ if (!wdat)
+ return -ENOMEM;
+
+ regs = devm_kcalloc(&pdev->dev, pdev->num_resources, sizeof(*regs),
+ GFP_KERNEL);
+ if (!regs)
+ return -ENOMEM;
+
+ /* WDAT specification wants to have >= 1ms period */
+ if (tbl->timer_period < 1)
+ return -EINVAL;
+ if (tbl->min_count > tbl->max_count)
+ return -EINVAL;
+
+ wdat->period = tbl->timer_period;
+ wdat->wdd.min_hw_heartbeat_ms = wdat->period * tbl->min_count;
+ wdat->wdd.max_hw_heartbeat_ms = wdat->period * tbl->max_count;
+ wdat->stopped_in_sleep = tbl->flags & ACPI_WDAT_STOPPED;
+ wdat->wdd.info = &wdat_wdt_info;
+ wdat->wdd.ops = &wdat_wdt_ops;
+ wdat->pdev = pdev;
+
+ /* Request and map all resources */
+ for (i = 0; i < pdev->num_resources; i++) {
+ void __iomem *reg;
+
+ res = &pdev->resource[i];
+ if (resource_type(res) == IORESOURCE_MEM) {
+ reg = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(reg))
+ return PTR_ERR(reg);
+ } else if (resource_type(res) == IORESOURCE_IO) {
+ reg = devm_ioport_map(&pdev->dev, res->start, 1);
+ if (!reg)
+ return -ENOMEM;
+ } else {
+ dev_err(&pdev->dev, "Unsupported resource\n");
+ return -EINVAL;
+ }
+
+ regs[i] = reg;
+ }
+
+ entries = (struct acpi_wdat_entry *)(tbl + 1);
+ for (i = 0; i < tbl->entries; i++) {
+ const struct acpi_generic_address *gas;
+ struct wdat_instruction *instr;
+ struct list_head *instructions;
+ unsigned int action;
+ struct resource r;
+ int j;
+
+ action = entries[i].action;
+ if (action >= MAX_WDAT_ACTIONS) {
+ dev_dbg(&pdev->dev, "Skipping unknown action: %u\n",
+ action);
+ continue;
+ }
+
+ instr = devm_kzalloc(&pdev->dev, sizeof(*instr), GFP_KERNEL);
+ if (!instr)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&instr->node);
+ instr->entry = entries[i];
+
+ gas = &entries[i].register_region;
+
+ memset(&r, 0, sizeof(r));
+ r.start = gas->address;
+ r.end = r.start + gas->access_width;
+ if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+ r.flags = IORESOURCE_MEM;
+ } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
+ r.flags = IORESOURCE_IO;
+ } else {
+ dev_dbg(&pdev->dev, "Unsupported address space: %d\n",
+ gas->space_id);
+ continue;
+ }
+
+ /* Find the matching resource */
+ for (j = 0; j < pdev->num_resources; j++) {
+ res = &pdev->resource[j];
+ if (resource_contains(res, &r)) {
+ instr->reg = regs[j] + r.start - res->start;
+ break;
+ }
+ }
+
+ if (!instr->reg) {
+ dev_err(&pdev->dev, "I/O resource not found\n");
+ return -EINVAL;
+ }
+
+ instructions = wdat->instructions[action];
+ if (!instructions) {
+ instructions = devm_kzalloc(&pdev->dev,
+ sizeof(*instructions), GFP_KERNEL);
+ if (!instructions)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(instructions);
+ wdat->instructions[action] = instructions;
+ }
+
+ list_add_tail(&instr->node, instructions);
+ }
+
+ wdat_wdt_boot_status(wdat);
+ wdat_wdt_set_running(wdat);
+
+ ret = wdat_wdt_enable_reboot(wdat);
+ if (ret)
+ return ret;
+
+ platform_set_drvdata(pdev, wdat);
+
+ watchdog_set_nowayout(&wdat->wdd, nowayout);
+ return devm_watchdog_register_device(&pdev->dev, &wdat->wdd);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int wdat_wdt_suspend_noirq(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct wdat_wdt *wdat = platform_get_drvdata(pdev);
+ int ret;
+
+ if (!watchdog_active(&wdat->wdd))
+ return 0;
+
+ /*
+ * We need to stop the watchdog if firmare is not doing it or if we
+ * are going suspend to idle (where firmware is not involved). If
+ * firmware is stopping the watchdog we kick it here one more time
+ * to give it some time.
+ */
+ wdat->stopped = false;
+ if (acpi_target_system_state() == ACPI_STATE_S0 ||
+ !wdat->stopped_in_sleep) {
+ ret = wdat_wdt_stop(&wdat->wdd);
+ if (!ret)
+ wdat->stopped = true;
+ } else {
+ ret = wdat_wdt_ping(&wdat->wdd);
+ }
+
+ return ret;
+}
+
+static int wdat_wdt_resume_noirq(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct wdat_wdt *wdat = platform_get_drvdata(pdev);
+ int ret;
+
+ if (!watchdog_active(&wdat->wdd))
+ return 0;
+
+ if (!wdat->stopped) {
+ /*
+ * Looks like the boot firmware reinitializes the watchdog
+ * before it hands off to the OS on resume from sleep so we
+ * stop and reprogram the watchdog here.
+ */
+ ret = wdat_wdt_stop(&wdat->wdd);
+ if (ret)
+ return ret;
+
+ ret = wdat_wdt_set_timeout(&wdat->wdd, wdat->wdd.timeout);
+ if (ret)
+ return ret;
+
+ ret = wdat_wdt_enable_reboot(wdat);
+ if (ret)
+ return ret;
+ }
+
+ return wdat_wdt_start(&wdat->wdd);
+}
+#endif
+
+static const struct dev_pm_ops wdat_wdt_pm_ops = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(wdat_wdt_suspend_noirq,
+ wdat_wdt_resume_noirq)
+};
+
+static struct platform_driver wdat_wdt_driver = {
+ .probe = wdat_wdt_probe,
+ .driver = {
+ .name = "wdat_wdt",
+ .pm = &wdat_wdt_pm_ops,
+ },
+};
+
+module_platform_driver(wdat_wdt_driver);
+
+MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>");
+MODULE_DESCRIPTION("ACPI Hardware Watchdog (WDAT) driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:wdat_wdt");
diff --git a/fs/Kconfig b/fs/Kconfig
index 2bc7ad7..3ef62ba 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -79,6 +79,7 @@
config FILE_LOCKING
bool "Enable POSIX file locking API" if EXPERT
default y
+ select PERCPU_RWSEM
help
This option enables standard file locking support, required
for filesystems like NFS and for the flock() system
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index b493909..d8e6d42 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -417,6 +417,7 @@
}
return NULL;
}
+
/*
* Find an eligible tree to time-out
* A tree is eligible if :-
@@ -432,6 +433,7 @@
struct dentry *root = sb->s_root;
struct dentry *dentry;
struct dentry *expired;
+ struct dentry *found;
struct autofs_info *ino;
if (!root)
@@ -442,31 +444,46 @@
dentry = NULL;
while ((dentry = get_next_positive_subdir(dentry, root))) {
+ int flags = how;
+
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
- if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
- expired = NULL;
- else
- expired = should_expire(dentry, mnt, timeout, how);
- if (!expired) {
+ if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
spin_unlock(&sbi->fs_lock);
continue;
}
+ spin_unlock(&sbi->fs_lock);
+
+ expired = should_expire(dentry, mnt, timeout, flags);
+ if (!expired)
+ continue;
+
+ spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(expired);
ino->flags |= AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
synchronize_rcu();
- spin_lock(&sbi->fs_lock);
- if (should_expire(expired, mnt, timeout, how)) {
- if (expired != dentry)
- dput(dentry);
- goto found;
- }
+ /* Make sure a reference is not taken on found if
+ * things have changed.
+ */
+ flags &= ~AUTOFS_EXP_LEAVES;
+ found = should_expire(expired, mnt, timeout, how);
+ if (!found || found != expired)
+ /* Something has changed, continue */
+ goto next;
+
+ if (expired != dentry)
+ dput(dentry);
+
+ spin_lock(&sbi->fs_lock);
+ goto found;
+next:
+ spin_lock(&sbi->fs_lock);
ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
+ spin_unlock(&sbi->fs_lock);
if (expired != dentry)
dput(expired);
- spin_unlock(&sbi->fs_lock);
}
return NULL;
@@ -483,6 +500,7 @@
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
int status;
+ int state;
/* Block on any pending expire */
if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
@@ -490,8 +508,19 @@
if (rcu_walk)
return -ECHILD;
+retry:
spin_lock(&sbi->fs_lock);
- if (ino->flags & AUTOFS_INF_EXPIRING) {
+ state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING);
+ if (state == AUTOFS_INF_WANT_EXPIRE) {
+ spin_unlock(&sbi->fs_lock);
+ /*
+ * Possibly being selected for expire, wait until
+ * it's selected or not.
+ */
+ schedule_timeout_uninterruptible(HZ/10);
+ goto retry;
+ }
+ if (state & AUTOFS_INF_EXPIRING) {
spin_unlock(&sbi->fs_lock);
pr_debug("waiting for expire %p name=%pd\n", dentry, dentry);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index e5495f3..2472af2 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1624,20 +1624,12 @@
regset->writeback(task, regset, 1);
}
-#ifndef PR_REG_SIZE
-#define PR_REG_SIZE(S) sizeof(S)
-#endif
-
#ifndef PRSTATUS_SIZE
-#define PRSTATUS_SIZE(S) sizeof(S)
-#endif
-
-#ifndef PR_REG_PTR
-#define PR_REG_PTR(S) (&((S)->pr_reg))
+#define PRSTATUS_SIZE(S, R) sizeof(S)
#endif
#ifndef SET_PR_FPVALID
-#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
+#define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
#endif
static int fill_thread_core_info(struct elf_thread_core_info *t,
@@ -1645,6 +1637,7 @@
long signr, size_t *total)
{
unsigned int i;
+ unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
/*
* NT_PRSTATUS is the one special case, because the regset data
@@ -1653,12 +1646,11 @@
* We assume that regset 0 is NT_PRSTATUS.
*/
fill_prstatus(&t->prstatus, t->task, signr);
- (void) view->regsets[0].get(t->task, &view->regsets[0],
- 0, PR_REG_SIZE(t->prstatus.pr_reg),
- PR_REG_PTR(&t->prstatus), NULL);
+ (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
+ &t->prstatus.pr_reg, NULL);
fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
- PRSTATUS_SIZE(t->prstatus), &t->prstatus);
+ PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
*total += notesize(&t->notes[0]);
do_thread_regset_writeback(t->task, &view->regsets[0]);
@@ -1688,7 +1680,8 @@
regset->core_note_type,
size, data);
else {
- SET_PR_FPVALID(&t->prstatus, 1);
+ SET_PR_FPVALID(&t->prstatus,
+ 1, regset_size);
fill_note(&t->notes[i], "CORE",
NT_PRFPREG, size, data);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 38c2df8..665da8f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4271,13 +4271,10 @@
if (ret < 0)
return ret;
- /*
- * Use new btrfs_qgroup_reserve_data to reserve precious data space
- *
- * TODO: Find a good method to avoid reserve data space for NOCOW
- * range, but don't impact performance on quota disable case.
- */
+ /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
ret = btrfs_qgroup_reserve_data(inode, start, len);
+ if (ret)
+ btrfs_free_reserved_data_space_noquota(inode, start, len);
return ret;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b2a2da5..7fd939b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1634,6 +1634,9 @@
int namelen;
int ret = 0;
+ if (!S_ISDIR(file_inode(file)->i_mode))
+ return -ENOTDIR;
+
ret = mnt_want_write_file(file);
if (ret)
goto out;
@@ -1691,6 +1694,9 @@
struct btrfs_ioctl_vol_args *vol_args;
int ret;
+ if (!S_ISDIR(file_inode(file)->i_mode))
+ return -ENOTDIR;
+
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@@ -1714,6 +1720,9 @@
bool readonly = false;
struct btrfs_qgroup_inherit *inherit = NULL;
+ if (!S_ISDIR(file_inode(file)->i_mode))
+ return -ENOTDIR;
+
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@@ -2357,6 +2366,9 @@
int ret;
int err = 0;
+ if (!S_ISDIR(dir->i_mode))
+ return -ENOTDIR;
+
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index c30cf49..2c6312d 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -333,6 +333,7 @@
if (bin_attr->cb_max_size &&
*ppos + count > bin_attr->cb_max_size) {
len = -EFBIG;
+ goto out;
}
tbuf = vmalloc(*ppos + count);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 592059f..354e2ab 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -97,9 +97,6 @@
#define F_DENTRY(filp) ((filp)->f_path.dentry)
-#define REAL_FOPS_DEREF(dentry) \
- ((const struct file_operations *)(dentry)->d_fsdata)
-
static int open_proxy_open(struct inode *inode, struct file *filp)
{
const struct dentry *dentry = F_DENTRY(filp);
@@ -112,7 +109,7 @@
goto out;
}
- real_fops = REAL_FOPS_DEREF(dentry);
+ real_fops = debugfs_real_fops(filp);
real_fops = fops_get(real_fops);
if (!real_fops) {
/* Huh? Module did not clean up after itself at exit? */
@@ -143,7 +140,7 @@
{ \
const struct dentry *dentry = F_DENTRY(filp); \
const struct file_operations *real_fops = \
- REAL_FOPS_DEREF(dentry); \
+ debugfs_real_fops(filp); \
int srcu_idx; \
ret_type r; \
\
@@ -176,7 +173,7 @@
struct poll_table_struct *wait)
{
const struct dentry *dentry = F_DENTRY(filp);
- const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry);
+ const struct file_operations *real_fops = debugfs_real_fops(filp);
int srcu_idx;
unsigned int r = 0;
@@ -193,7 +190,7 @@
static int full_proxy_release(struct inode *inode, struct file *filp)
{
const struct dentry *dentry = F_DENTRY(filp);
- const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry);
+ const struct file_operations *real_fops = debugfs_real_fops(filp);
const struct file_operations *proxy_fops = filp->f_op;
int r = 0;
@@ -209,7 +206,7 @@
replace_fops(filp, d_inode(dentry)->i_fop);
kfree((void *)proxy_fops);
fops_put(real_fops);
- return 0;
+ return r;
}
static void __full_proxy_fops_init(struct file_operations *proxy_fops,
@@ -241,7 +238,7 @@
goto out;
}
- real_fops = REAL_FOPS_DEREF(dentry);
+ real_fops = debugfs_real_fops(filp);
real_fops = fops_get(real_fops);
if (!real_fops) {
/* Huh? Module did not cleanup after itself at exit? */
diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
index bba5263..b3e8443 100644
--- a/fs/debugfs/internal.h
+++ b/fs/debugfs/internal.h
@@ -19,8 +19,4 @@
extern const struct file_operations debugfs_open_proxy_file_operations;
extern const struct file_operations debugfs_full_proxy_file_operations;
-struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode,
- struct dentry *parent, void *data,
- const struct file_operations *fops);
-
#endif /* _DEBUGFS_INTERNAL_H_ */
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 79a5941..442d1a7 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -272,13 +272,8 @@
struct dentry *root = sb->s_root;
struct pts_fs_info *fsi = DEVPTS_SB(sb);
struct pts_mount_opts *opts = &fsi->mount_opts;
- kuid_t root_uid;
- kgid_t root_gid;
-
- root_uid = make_kuid(current_user_ns(), 0);
- root_gid = make_kgid(current_user_ns(), 0);
- if (!uid_valid(root_uid) || !gid_valid(root_gid))
- return -EINVAL;
+ kuid_t ptmx_uid = current_fsuid();
+ kgid_t ptmx_gid = current_fsgid();
inode_lock(d_inode(root));
@@ -309,8 +304,8 @@
mode = S_IFCHR|opts->ptmxmode;
init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
- inode->i_uid = root_uid;
- inode->i_gid = root_gid;
+ inode->i_uid = ptmx_uid;
+ inode->i_gid = ptmx_gid;
d_add(dentry, inode);
@@ -336,7 +331,6 @@
struct pts_fs_info *fsi = DEVPTS_SB(sb);
struct pts_mount_opts *opts = &fsi->mount_opts;
- sync_filesystem(sb);
err = parse_mount_options(data, PARSE_REMOUNT, opts);
/*
@@ -395,6 +389,7 @@
devpts_fill_super(struct super_block *s, void *data, int silent)
{
struct inode *inode;
+ int error;
s->s_iflags &= ~SB_I_NODEV;
s->s_blocksize = 1024;
@@ -403,10 +398,16 @@
s->s_op = &devpts_sops;
s->s_time_gran = 1;
+ error = -ENOMEM;
s->s_fs_info = new_pts_fs_info(s);
if (!s->s_fs_info)
goto fail;
+ error = parse_mount_options(data, PARSE_MOUNT, &DEVPTS_SB(s)->mount_opts);
+ if (error)
+ goto fail;
+
+ error = -ENOMEM;
inode = new_inode(s);
if (!inode)
goto fail;
@@ -418,13 +419,21 @@
set_nlink(inode, 2);
s->s_root = d_make_root(inode);
- if (s->s_root)
- return 0;
+ if (!s->s_root) {
+ pr_err("get root dentry failed\n");
+ goto fail;
+ }
- pr_err("get root dentry failed\n");
+ error = mknod_ptmx(s);
+ if (error)
+ goto fail_dput;
+ return 0;
+fail_dput:
+ dput(s->s_root);
+ s->s_root = NULL;
fail:
- return -ENOMEM;
+ return error;
}
/*
@@ -436,43 +445,15 @@
static struct dentry *devpts_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- int error;
- struct pts_mount_opts opts;
- struct super_block *s;
-
- error = parse_mount_options(data, PARSE_MOUNT, &opts);
- if (error)
- return ERR_PTR(error);
-
- s = sget(fs_type, NULL, set_anon_super, flags, NULL);
- if (IS_ERR(s))
- return ERR_CAST(s);
-
- if (!s->s_root) {
- error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
- if (error)
- goto out_undo_sget;
- s->s_flags |= MS_ACTIVE;
- }
-
- memcpy(&(DEVPTS_SB(s))->mount_opts, &opts, sizeof(opts));
-
- error = mknod_ptmx(s);
- if (error)
- goto out_undo_sget;
-
- return dget(s->s_root);
-
-out_undo_sget:
- deactivate_locked_super(s);
- return ERR_PTR(error);
+ return mount_nodev(fs_type, flags, data, devpts_fill_super);
}
static void devpts_kill_sb(struct super_block *sb)
{
struct pts_fs_info *fsi = DEVPTS_SB(sb);
- ida_destroy(&fsi->allocated_ptys);
+ if (fsi)
+ ida_destroy(&fsi->allocated_ptys);
kfree(fsi);
kill_litter_super(sb);
}
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 1d73fc6..cbb50ca 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -105,7 +105,10 @@
inode->i_private = var;
- efivar_entry_add(var, &efivarfs_list);
+ err = efivar_entry_add(var, &efivarfs_list);
+ if (err)
+ goto out;
+
d_instantiate(dentry, inode);
dget(dentry);
out:
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 688ccc1..d7a7c53 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -157,12 +157,14 @@
goto fail_inode;
}
+ efivar_entry_size(entry, &size);
+ err = efivar_entry_add(entry, &efivarfs_list);
+ if (err)
+ goto fail_inode;
+
/* copied by the above to local storage in the dentry. */
kfree(name);
- efivar_entry_size(entry, &size);
- efivar_entry_add(entry, &efivarfs_list);
-
inode_lock(inode);
inode->i_private = entry;
i_size_write(inode, size + sizeof(entry->var.Attributes));
@@ -182,7 +184,10 @@
static int efivarfs_destroy(struct efivar_entry *entry, void *data)
{
- efivar_entry_remove(entry);
+ int err = efivar_entry_remove(entry);
+
+ if (err)
+ return err;
kfree(entry);
return 0;
}
diff --git a/fs/locks.c b/fs/locks.c
index ee1b15f..133fb25 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -127,7 +127,6 @@
#include <linux/pid_namespace.h>
#include <linux/hashtable.h>
#include <linux/percpu.h>
-#include <linux/lglock.h>
#define CREATE_TRACE_POINTS
#include <trace/events/filelock.h>
@@ -158,12 +157,18 @@
/*
* The global file_lock_list is only used for displaying /proc/locks, so we
- * keep a list on each CPU, with each list protected by its own spinlock via
- * the file_lock_lglock. Note that alterations to the list also require that
- * the relevant flc_lock is held.
+ * keep a list on each CPU, with each list protected by its own spinlock.
+ * Global serialization is done using file_rwsem.
+ *
+ * Note that alterations to the list also require that the relevant flc_lock is
+ * held.
*/
-DEFINE_STATIC_LGLOCK(file_lock_lglock);
-static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
+struct file_lock_list_struct {
+ spinlock_t lock;
+ struct hlist_head hlist;
+};
+static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
+DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
/*
* The blocked_hash is used to find POSIX lock loops for deadlock detection.
@@ -587,15 +592,23 @@
/* Must be called with the flc_lock held! */
static void locks_insert_global_locks(struct file_lock *fl)
{
- lg_local_lock(&file_lock_lglock);
+ struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);
+
+ percpu_rwsem_assert_held(&file_rwsem);
+
+ spin_lock(&fll->lock);
fl->fl_link_cpu = smp_processor_id();
- hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list));
- lg_local_unlock(&file_lock_lglock);
+ hlist_add_head(&fl->fl_link, &fll->hlist);
+ spin_unlock(&fll->lock);
}
/* Must be called with the flc_lock held! */
static void locks_delete_global_locks(struct file_lock *fl)
{
+ struct file_lock_list_struct *fll;
+
+ percpu_rwsem_assert_held(&file_rwsem);
+
/*
* Avoid taking lock if already unhashed. This is safe since this check
* is done while holding the flc_lock, and new insertions into the list
@@ -603,9 +616,11 @@
*/
if (hlist_unhashed(&fl->fl_link))
return;
- lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu);
+
+ fll = per_cpu_ptr(&file_lock_list, fl->fl_link_cpu);
+ spin_lock(&fll->lock);
hlist_del_init(&fl->fl_link);
- lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu);
+ spin_unlock(&fll->lock);
}
static unsigned long
@@ -915,6 +930,7 @@
return -ENOMEM;
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
if (request->fl_flags & FL_ACCESS)
goto find_conflict;
@@ -955,6 +971,7 @@
out:
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
if (new_fl)
locks_free_lock(new_fl);
locks_dispose_list(&dispose);
@@ -991,6 +1008,7 @@
new_fl2 = locks_alloc_lock();
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
/*
* New lock request. Walk all POSIX locks and look for conflicts. If
@@ -1162,6 +1180,7 @@
}
out:
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
/*
* Free any unused locks.
*/
@@ -1436,6 +1455,7 @@
return error;
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
@@ -1487,9 +1507,13 @@
locks_insert_block(fl, new_fl);
trace_break_lease_block(inode, new_fl);
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
+
locks_dispose_list(&dispose);
error = wait_event_interruptible_timeout(new_fl->fl_wait,
!new_fl->fl_next, break_time);
+
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
trace_break_lease_unblock(inode, new_fl);
locks_delete_block(new_fl);
@@ -1506,6 +1530,7 @@
}
out:
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
locks_dispose_list(&dispose);
locks_free_lock(new_fl);
return error;
@@ -1660,6 +1685,7 @@
return -EINVAL;
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
error = check_conflicting_open(dentry, arg, lease->fl_flags);
@@ -1730,6 +1756,7 @@
lease->fl_lmops->lm_setup(lease, priv);
out:
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
locks_dispose_list(&dispose);
if (is_deleg)
inode_unlock(inode);
@@ -1752,6 +1779,7 @@
return error;
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
if (fl->fl_file == filp &&
@@ -1764,6 +1792,7 @@
if (victim)
error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
locks_dispose_list(&dispose);
return error;
}
@@ -2703,9 +2732,9 @@
struct locks_iterator *iter = f->private;
iter->li_pos = *pos + 1;
- lg_global_lock(&file_lock_lglock);
+ percpu_down_write(&file_rwsem);
spin_lock(&blocked_lock_lock);
- return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos);
+ return seq_hlist_start_percpu(&file_lock_list.hlist, &iter->li_cpu, *pos);
}
static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
@@ -2713,14 +2742,14 @@
struct locks_iterator *iter = f->private;
++iter->li_pos;
- return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos);
+ return seq_hlist_next_percpu(v, &file_lock_list.hlist, &iter->li_cpu, pos);
}
static void locks_stop(struct seq_file *f, void *v)
__releases(&blocked_lock_lock)
{
spin_unlock(&blocked_lock_lock);
- lg_global_unlock(&file_lock_lglock);
+ percpu_up_write(&file_rwsem);
}
static const struct seq_operations locks_seq_operations = {
@@ -2761,10 +2790,13 @@
filelock_cache = kmem_cache_create("file_lock_cache",
sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
- lg_lock_init(&file_lock_lglock, "file_lock_lglock");
- for_each_possible_cpu(i)
- INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i));
+ for_each_possible_cpu(i) {
+ struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
+
+ spin_lock_init(&fll->lock);
+ INIT_HLIST_HEAD(&fll->hlist);
+ }
return 0;
}
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index d2f97ec..e0e5f7c 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -67,18 +67,7 @@
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- wait_event(group->fanotify_data.access_waitq, event->response ||
- atomic_read(&group->fanotify_data.bypass_perm));
-
- if (!event->response) { /* bypass_perm set */
- /*
- * Event was canceled because group is being destroyed. Remove
- * it from group's event list because we are responsible for
- * freeing the permission event.
- */
- fsnotify_remove_event(group, &event->fae.fse);
- return 0;
- }
+ wait_event(group->fanotify_data.access_waitq, event->response);
/* userspace responded, convert to something usable */
switch (event->response) {
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8e8e6bc..a643138 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -358,16 +358,20 @@
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
struct fanotify_perm_event_info *event, *next;
+ struct fsnotify_event *fsn_event;
/*
- * There may be still new events arriving in the notification queue
- * but since userspace cannot use fanotify fd anymore, no event can
- * enter or leave access_list by now.
+ * Stop new events from arriving in the notification queue. since
+ * userspace cannot use fanotify fd anymore, no event can enter or
+ * leave access_list by now either.
+ */
+ fsnotify_group_stop_queueing(group);
+
+ /*
+ * Process all permission events on access_list and notification queue
+ * and simulate reply from userspace.
*/
spin_lock(&group->fanotify_data.access_lock);
-
- atomic_inc(&group->fanotify_data.bypass_perm);
-
list_for_each_entry_safe(event, next, &group->fanotify_data.access_list,
fae.fse.list) {
pr_debug("%s: found group=%p event=%p\n", __func__, group,
@@ -379,12 +383,21 @@
spin_unlock(&group->fanotify_data.access_lock);
/*
- * Since bypass_perm is set, newly queued events will not wait for
- * access response. Wake up the already sleeping ones now.
- * synchronize_srcu() in fsnotify_destroy_group() will wait for all
- * processes sleeping in fanotify_handle_event() waiting for access
- * response and thus also for all permission events to be freed.
+ * Destroy all non-permission events. For permission events just
+ * dequeue them and set the response. They will be freed once the
+ * response is consumed and fanotify_get_response() returns.
*/
+ mutex_lock(&group->notification_mutex);
+ while (!fsnotify_notify_queue_is_empty(group)) {
+ fsn_event = fsnotify_remove_first_event(group);
+ if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS))
+ fsnotify_destroy_event(group, fsn_event);
+ else
+ FANOTIFY_PE(fsn_event)->response = FAN_ALLOW;
+ }
+ mutex_unlock(&group->notification_mutex);
+
+ /* Response for all permission events it set, wakeup waiters */
wake_up(&group->fanotify_data.access_waitq);
#endif
@@ -755,7 +768,6 @@
spin_lock_init(&group->fanotify_data.access_lock);
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);
- atomic_set(&group->fanotify_data.bypass_perm, 0);
#endif
switch (flags & FAN_ALL_CLASS_BITS) {
case FAN_CLASS_NOTIF:
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 3e2dd85..b47f7cf 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -40,6 +40,17 @@
}
/*
+ * Stop queueing new events for this group. Once this function returns
+ * fsnotify_add_event() will not add any new events to the group's queue.
+ */
+void fsnotify_group_stop_queueing(struct fsnotify_group *group)
+{
+ mutex_lock(&group->notification_mutex);
+ group->shutdown = true;
+ mutex_unlock(&group->notification_mutex);
+}
+
+/*
* Trying to get rid of a group. Remove all marks, flush all events and release
* the group reference.
* Note that another thread calling fsnotify_clear_marks_by_group() may still
@@ -47,6 +58,14 @@
*/
void fsnotify_destroy_group(struct fsnotify_group *group)
{
+ /*
+ * Stop queueing new events. The code below is careful enough to not
+ * require this but fanotify needs to stop queuing events even before
+ * fsnotify_destroy_group() is called and this makes the other callers
+ * of fsnotify_destroy_group() to see the same behavior.
+ */
+ fsnotify_group_stop_queueing(group);
+
/* clear all inode marks for this group, attach them to destroy_list */
fsnotify_detach_group_marks(group);
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index a95d8e0..e455e83 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -82,7 +82,8 @@
* Add an event to the group notification queue. The group can later pull this
* event off the queue to deal with. The function returns 0 if the event was
* added to the queue, 1 if the event was merged with some other queued event,
- * 2 if the queue of events has overflown.
+ * 2 if the event was not queued - either the queue of events has overflown
+ * or the group is shutting down.
*/
int fsnotify_add_event(struct fsnotify_group *group,
struct fsnotify_event *event,
@@ -96,6 +97,11 @@
mutex_lock(&group->notification_mutex);
+ if (group->shutdown) {
+ mutex_unlock(&group->notification_mutex);
+ return 2;
+ }
+
if (group->q_len >= group->max_events) {
ret = 2;
/* Queue overflow event only if it isn't already queued */
@@ -126,21 +132,6 @@
}
/*
- * Remove @event from group's notification queue. It is the responsibility of
- * the caller to destroy the event.
- */
-void fsnotify_remove_event(struct fsnotify_group *group,
- struct fsnotify_event *event)
-{
- mutex_lock(&group->notification_mutex);
- if (!list_empty(&event->list)) {
- list_del_init(&event->list);
- group->q_len--;
- }
- mutex_unlock(&group->notification_mutex);
-}
-
-/*
* Remove and return the first event from the notification list. It is the
* responsibility of the caller to destroy the obtained event
*/
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7dabbc3..f165f86 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5922,7 +5922,6 @@
}
static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
- handle_t *handle,
struct inode *data_alloc_inode,
struct buffer_head *data_alloc_bh)
{
@@ -5935,11 +5934,19 @@
struct ocfs2_truncate_log *tl;
struct inode *tl_inode = osb->osb_tl_inode;
struct buffer_head *tl_bh = osb->osb_tl_bh;
+ handle_t *handle;
di = (struct ocfs2_dinode *) tl_bh->b_data;
tl = &di->id2.i_dealloc;
i = le16_to_cpu(tl->tl_used) - 1;
while (i >= 0) {
+ handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto bail;
+ }
+
/* Caller has given us at least enough credits to
* update the truncate log dinode */
status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
@@ -5974,12 +5981,7 @@
}
}
- status = ocfs2_extend_trans(handle,
- OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
+ ocfs2_commit_trans(osb, handle);
i--;
}
@@ -5994,7 +5996,6 @@
{
int status;
unsigned int num_to_flush;
- handle_t *handle;
struct inode *tl_inode = osb->osb_tl_inode;
struct inode *data_alloc_inode = NULL;
struct buffer_head *tl_bh = osb->osb_tl_bh;
@@ -6038,21 +6039,11 @@
goto out_mutex;
}
- handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto out_unlock;
- }
-
- status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
+ status = ocfs2_replay_truncate_records(osb, data_alloc_inode,
data_alloc_bh);
if (status < 0)
mlog_errno(status);
- ocfs2_commit_trans(osb, handle);
-
-out_unlock:
brelse(data_alloc_bh);
ocfs2_inode_unlock(data_alloc_inode, 1);
@@ -6413,43 +6404,34 @@
goto out_mutex;
}
- handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out_unlock;
- }
-
while (head) {
if (head->free_bg)
bg_blkno = head->free_bg;
else
bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
head->free_bit);
+ handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ goto out_unlock;
+ }
+
trace_ocfs2_free_cached_blocks(
(unsigned long long)head->free_blk, head->free_bit);
ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
head->free_bit, bg_blkno, 1);
- if (ret) {
+ if (ret)
mlog_errno(ret);
- goto out_journal;
- }
- ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
- if (ret) {
- mlog_errno(ret);
- goto out_journal;
- }
+ ocfs2_commit_trans(osb, handle);
tmp = head;
head = head->free_next;
kfree(tmp);
}
-out_journal:
- ocfs2_commit_trans(osb, handle);
-
out_unlock:
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 98d3654..bbb4b3e 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1842,6 +1842,16 @@
ocfs2_commit_trans(osb, handle);
out:
+ /*
+ * The mmapped page won't be unlocked in ocfs2_free_write_ctxt(),
+ * even in case of error here like ENOSPC and ENOMEM. So, we need
+ * to unlock the target page manually to prevent deadlocks when
+ * retrying again on ENOSPC, or when returning non-VM_FAULT_LOCKED
+ * to VM code.
+ */
+ if (wc->w_target_locked)
+ unlock_page(mmap_page);
+
ocfs2_free_write_ctxt(inode, wc);
if (data_ac) {
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 94b1836..b95e7df 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,9 +44,6 @@
* version here in tcp_internal.h should not need to be bumped for
* filesystem locking changes.
*
- * New in version 12
- * - Negotiate hb timeout when storage is down.
- *
* New in version 11
* - Negotiation of filesystem locking in the dlm join.
*
@@ -78,7 +75,7 @@
* - full 64 bit i_size in the metadata lock lvbs
* - introduction of "rw" lock and pushing meta/data locking down
*/
-#define O2NET_PROTOCOL_VERSION 12ULL
+#define O2NET_PROTOCOL_VERSION 11ULL
struct o2net_handshake {
__be64 protocol_version;
__be64 connector_id;
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index cdeafb4..0bb1286 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -268,7 +268,6 @@
struct dlm_lock *lock, int flags, int type)
{
enum dlm_status status;
- u8 old_owner = res->owner;
mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
@@ -335,7 +334,6 @@
spin_lock(&res->spinlock);
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
- lock->convert_pending = 0;
/* if it failed, move it back to granted queue.
* if master returns DLM_NORMAL and then down before sending ast,
* it may have already been moved to granted queue, reset to
@@ -344,12 +342,14 @@
if (status != DLM_NOTQUEUED)
dlm_error(status);
dlm_revert_pending_convert(res, lock);
- } else if ((res->state & DLM_LOCK_RES_RECOVERING) ||
- (old_owner != res->owner)) {
- mlog(0, "res %.*s is in recovering or has been recovered.\n",
- res->lockname.len, res->lockname.name);
+ } else if (!lock->convert_pending) {
+ mlog(0, "%s: res %.*s, owner died and lock has been moved back "
+ "to granted list, retry convert.\n",
+ dlm->name, res->lockname.len, res->lockname.name);
status = DLM_RECOVERING;
}
+
+ lock->convert_pending = 0;
bail:
spin_unlock(&res->spinlock);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4e7b0dc..0b055bf 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1506,7 +1506,8 @@
u64 start, u64 len)
{
int ret = 0;
- u64 tmpend, end = start + len;
+ u64 tmpend = 0;
+ u64 end = start + len;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
unsigned int csize = osb->s_clustersize;
handle_t *handle;
@@ -1538,18 +1539,31 @@
}
/*
- * We want to get the byte offset of the end of the 1st cluster.
+ * If start is on a cluster boundary and end is somewhere in another
+ * cluster, we have not COWed the cluster starting at start, unless
+ * end is also within the same cluster. So, in this case, we skip this
+ * first call to ocfs2_zero_range_for_truncate() truncate and move on
+ * to the next one.
*/
- tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
- if (tmpend > end)
- tmpend = end;
+ if ((start & (csize - 1)) != 0) {
+ /*
+ * We want to get the byte offset of the end of the 1st
+ * cluster.
+ */
+ tmpend = (u64)osb->s_clustersize +
+ (start & ~(osb->s_clustersize - 1));
+ if (tmpend > end)
+ tmpend = end;
- trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start,
- (unsigned long long)tmpend);
+ trace_ocfs2_zero_partial_clusters_range1(
+ (unsigned long long)start,
+ (unsigned long long)tmpend);
- ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
- if (ret)
- mlog_errno(ret);
+ ret = ocfs2_zero_range_for_truncate(inode, handle, start,
+ tmpend);
+ if (ret)
+ mlog_errno(ret);
+ }
if (tmpend < end) {
/*
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index ea47120..6ad3533 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1199,14 +1199,24 @@
inode_unlock((*ac)->ac_inode);
ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted);
- if (ret == 1)
+ if (ret == 1) {
+ iput((*ac)->ac_inode);
+ (*ac)->ac_inode = NULL;
goto retry;
+ }
if (ret < 0)
mlog_errno(ret);
inode_lock((*ac)->ac_inode);
- ocfs2_inode_lock((*ac)->ac_inode, NULL, 1);
+ ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ inode_unlock((*ac)->ac_inode);
+ iput((*ac)->ac_inode);
+ (*ac)->ac_inode = NULL;
+ goto bail;
+ }
}
if (status < 0) {
if (status != -ENOSPC)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ac0df4d..3b792ab 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -483,7 +483,7 @@
save_stack_trace_tsk(task, &trace);
for (i = 0; i < trace.nr_entries; i++) {
- seq_printf(m, "[<%pK>] %pS\n",
+ seq_printf(m, "[<%pK>] %pB\n",
(void *)entries[i], (void *)entries[i]);
}
unlock_trace(task);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index a939f5e..5c89a07 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -430,6 +430,7 @@
static ssize_t
read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
{
+ char *buf = file->private_data;
ssize_t acc = 0;
size_t size, tsz;
size_t elf_buflen;
@@ -500,23 +501,20 @@
if (clear_user(buffer, tsz))
return -EFAULT;
} else if (is_vmalloc_or_module_addr((void *)start)) {
- char * elf_buf;
-
- elf_buf = kzalloc(tsz, GFP_KERNEL);
- if (!elf_buf)
- return -ENOMEM;
- vread(elf_buf, (char *)start, tsz);
+ vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
- if (copy_to_user(buffer, elf_buf, tsz)) {
- kfree(elf_buf);
+ if (copy_to_user(buffer, buf, tsz))
return -EFAULT;
- }
- kfree(elf_buf);
} else {
if (kern_addr_valid(start)) {
unsigned long n;
- n = copy_to_user(buffer, (char *)start, tsz);
+ /*
+ * Using bounce buffer to bypass the
+ * hardened user copy kernel text checks.
+ */
+ memcpy(buf, (char *) start, tsz);
+ n = copy_to_user(buffer, buf, tsz);
/*
* We cannot distinguish between fault on source
* and fault on destination. When this happens
@@ -549,6 +547,11 @@
{
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
+
+ filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!filp->private_data)
+ return -ENOMEM;
+
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
@@ -559,10 +562,16 @@
return 0;
}
+static int release_kcore(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
static const struct file_operations proc_kcore_operations = {
.read = read_kcore,
.open = open_kcore,
+ .release = release_kcore,
.llseek = default_llseek,
};
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 183a212..12af049 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -27,9 +27,17 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/ramfs.h>
+#include <linux/sched.h>
#include "internal.h"
+static unsigned long ramfs_mmu_get_unmapped_area(struct file *file,
+ unsigned long addr, unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
const struct file_operations ramfs_file_operations = {
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
@@ -38,6 +46,7 @@
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = generic_file_llseek,
+ .get_unmapped_area = ramfs_mmu_get_unmapped_area,
};
const struct inode_operations ramfs_file_inode_operations = {
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index dc1358b..ac2de0e 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -233,8 +233,8 @@
kn = kernfs_find_and_get(parent, grp->name);
if (!kn) {
WARN(!kn, KERN_WARNING
- "sysfs group %p not found for kobject '%s'\n",
- grp, kobject_name(kobj));
+ "sysfs group '%s' not found for kobject '%s'\n",
+ grp->name, kobject_name(kobj));
return;
}
} else {
diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h
index fe2e3ac..12c2882 100644
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -144,6 +144,10 @@
#define ACPI_ADDRESS_RANGE_MAX 2
+/* Maximum number of While() loops before abort */
+
+#define ACPI_MAX_LOOP_COUNT 0xFFFF
+
/******************************************************************************
*
* ACPI Specification constants (Do not change unless the specification changes)
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index 34f601e..48eb4dd 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -366,7 +366,7 @@
ACPI_TRACE_ENTRY (name, acpi_ut_trace_u32, u32, value)
#define ACPI_FUNCTION_TRACE_STR(name, string) \
- ACPI_TRACE_ENTRY (name, acpi_ut_trace_str, char *, string)
+ ACPI_TRACE_ENTRY (name, acpi_ut_trace_str, const char *, string)
#define ACPI_FUNCTION_ENTRY() \
acpi_ut_track_stack_ptr()
@@ -425,6 +425,9 @@
#define return_PTR(pointer) \
ACPI_TRACE_EXIT (acpi_ut_ptr_exit, void *, pointer)
+#define return_STR(string) \
+ ACPI_TRACE_EXIT (acpi_ut_str_exit, const char *, string)
+
#define return_VALUE(value) \
ACPI_TRACE_EXIT (acpi_ut_value_exit, u64, value)
@@ -478,6 +481,7 @@
#define return_VOID return
#define return_ACPI_STATUS(s) return(s)
#define return_PTR(s) return(s)
+#define return_STR(s) return(s)
#define return_VALUE(s) return(s)
#define return_UINT8(s) return(s)
#define return_UINT32(s) return(s)
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 562603d..f3414c8 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -371,6 +371,12 @@
acpi_status acpi_os_notify_command_complete(void);
#endif
+#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_trace_point
+void
+acpi_os_trace_point(acpi_trace_event_type type,
+ u8 begin, u8 *aml, char *pathname);
+#endif
+
/*
* Obtain ACPI table(s)
*/
@@ -416,41 +422,4 @@
void acpi_os_close_directory(void *dir_handle);
#endif
-/*
- * File I/O and related support
- */
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_open_file
-ACPI_FILE acpi_os_open_file(const char *path, u8 modes);
-#endif
-
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_close_file
-void acpi_os_close_file(ACPI_FILE file);
-#endif
-
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_read_file
-int
-acpi_os_read_file(ACPI_FILE file,
- void *buffer, acpi_size size, acpi_size count);
-#endif
-
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_write_file
-int
-acpi_os_write_file(ACPI_FILE file,
- void *buffer, acpi_size size, acpi_size count);
-#endif
-
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_file_offset
-long acpi_os_get_file_offset(ACPI_FILE file);
-#endif
-
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_set_file_offset
-acpi_status acpi_os_set_file_offset(ACPI_FILE file, long offset, u8 from);
-#endif
-
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_trace_point
-void
-acpi_os_trace_point(acpi_trace_event_type type,
- u8 begin, u8 *aml, char *pathname);
-#endif
-
#endif /* __ACPIOSXF_H__ */
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 1ff3a76..c7b3a13 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -46,7 +46,7 @@
/* Current ACPICA subsystem version in YYYYMMDD format */
-#define ACPI_CA_VERSION 0x20160422
+#define ACPI_CA_VERSION 0x20160831
#include <acpi/acconfig.h>
#include <acpi/actypes.h>
@@ -195,6 +195,13 @@
ACPI_INIT_GLOBAL(u8, acpi_gbl_group_module_level_code, TRUE);
/*
+ * Optionally support module level code by parsing the entire table as
+ * a term_list. Default is FALSE, do not execute entire table until some
+ * lock order issues are fixed.
+ */
+ACPI_INIT_GLOBAL(u8, acpi_gbl_parse_table_as_term_list, FALSE);
+
+/*
* Optionally use 32-bit FADT addresses if and when there is a conflict
* (address mismatch) between the 32-bit and 64-bit versions of the
* address. Although ACPICA adheres to the ACPI specification which
@@ -416,18 +423,19 @@
/*
* Initialization
*/
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
acpi_initialize_tables(struct acpi_table_desc
*initial_storage,
u32 initial_table_count,
u8 allow_resize))
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init acpi_initialize_subsystem(void))
-
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init acpi_enable_subsystem(u32 flags))
-
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init
- acpi_initialize_objects(u32 flags))
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init acpi_terminate(void))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
+ acpi_initialize_subsystem(void))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
+ acpi_enable_subsystem(u32 flags))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
+ acpi_initialize_objects(u32 flags))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
+ acpi_terminate(void))
/*
* Miscellaneous global interfaces
@@ -467,7 +475,7 @@
/*
* ACPI table load/unload interfaces
*/
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
acpi_install_table(acpi_physical_address address,
u8 physical))
@@ -476,14 +484,17 @@
ACPI_EXTERNAL_RETURN_STATUS(acpi_status
acpi_unload_parent_table(acpi_handle object))
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init acpi_load_tables(void))
+
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
+ acpi_load_tables(void))
/*
* ACPI table manipulation interfaces
*/
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init acpi_reallocate_root_table(void))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
+ acpi_reallocate_root_table(void))
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
acpi_find_root_pointer(acpi_physical_address
*rsdp_address))
ACPI_EXTERNAL_RETURN_STATUS(acpi_status
@@ -732,6 +743,10 @@
u32 gpe_number))
ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
+ acpi_mask_gpe(acpi_handle gpe_device,
+ u32 gpe_number, u8 is_masked))
+
+ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
acpi_mark_gpe_for_wake(acpi_handle gpe_device,
u32 gpe_number))
@@ -935,9 +950,6 @@
acpi_trace_point(acpi_trace_event_type type,
u8 begin,
u8 *aml, char *pathname))
-ACPI_APP_DEPENDENT_RETURN_VOID(ACPI_PRINTF_LIKE(1)
- void ACPI_INTERNAL_VAR_XFACE
- acpi_log_error(const char *format, ...))
acpi_status acpi_initialize_debugger(void);
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index c19700e..1b949e0 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -230,62 +230,72 @@
/* Fields common to all versions of the FADT */
struct acpi_table_fadt {
- struct acpi_table_header header; /* Common ACPI table header */
- u32 facs; /* 32-bit physical address of FACS */
- u32 dsdt; /* 32-bit physical address of DSDT */
- u8 model; /* System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */
- u8 preferred_profile; /* Conveys preferred power management profile to OSPM. */
- u16 sci_interrupt; /* System vector of SCI interrupt */
- u32 smi_command; /* 32-bit Port address of SMI command port */
- u8 acpi_enable; /* Value to write to SMI_CMD to enable ACPI */
- u8 acpi_disable; /* Value to write to SMI_CMD to disable ACPI */
- u8 s4_bios_request; /* Value to write to SMI_CMD to enter S4BIOS state */
- u8 pstate_control; /* Processor performance state control */
- u32 pm1a_event_block; /* 32-bit port address of Power Mgt 1a Event Reg Blk */
- u32 pm1b_event_block; /* 32-bit port address of Power Mgt 1b Event Reg Blk */
- u32 pm1a_control_block; /* 32-bit port address of Power Mgt 1a Control Reg Blk */
- u32 pm1b_control_block; /* 32-bit port address of Power Mgt 1b Control Reg Blk */
- u32 pm2_control_block; /* 32-bit port address of Power Mgt 2 Control Reg Blk */
- u32 pm_timer_block; /* 32-bit port address of Power Mgt Timer Ctrl Reg Blk */
- u32 gpe0_block; /* 32-bit port address of General Purpose Event 0 Reg Blk */
- u32 gpe1_block; /* 32-bit port address of General Purpose Event 1 Reg Blk */
- u8 pm1_event_length; /* Byte Length of ports at pm1x_event_block */
- u8 pm1_control_length; /* Byte Length of ports at pm1x_control_block */
- u8 pm2_control_length; /* Byte Length of ports at pm2_control_block */
- u8 pm_timer_length; /* Byte Length of ports at pm_timer_block */
- u8 gpe0_block_length; /* Byte Length of ports at gpe0_block */
- u8 gpe1_block_length; /* Byte Length of ports at gpe1_block */
- u8 gpe1_base; /* Offset in GPE number space where GPE1 events start */
- u8 cst_control; /* Support for the _CST object and C-States change notification */
- u16 c2_latency; /* Worst case HW latency to enter/exit C2 state */
- u16 c3_latency; /* Worst case HW latency to enter/exit C3 state */
- u16 flush_size; /* Processor memory cache line width, in bytes */
- u16 flush_stride; /* Number of flush strides that need to be read */
- u8 duty_offset; /* Processor duty cycle index in processor P_CNT reg */
- u8 duty_width; /* Processor duty cycle value bit width in P_CNT register */
- u8 day_alarm; /* Index to day-of-month alarm in RTC CMOS RAM */
- u8 month_alarm; /* Index to month-of-year alarm in RTC CMOS RAM */
- u8 century; /* Index to century in RTC CMOS RAM */
- u16 boot_flags; /* IA-PC Boot Architecture Flags (see below for individual flags) */
- u8 reserved; /* Reserved, must be zero */
- u32 flags; /* Miscellaneous flag bits (see below for individual flags) */
- struct acpi_generic_address reset_register; /* 64-bit address of the Reset register */
- u8 reset_value; /* Value to write to the reset_register port to reset the system */
- u16 arm_boot_flags; /* ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */
- u8 minor_revision; /* FADT Minor Revision (ACPI 5.1) */
- u64 Xfacs; /* 64-bit physical address of FACS */
- u64 Xdsdt; /* 64-bit physical address of DSDT */
- struct acpi_generic_address xpm1a_event_block; /* 64-bit Extended Power Mgt 1a Event Reg Blk address */
- struct acpi_generic_address xpm1b_event_block; /* 64-bit Extended Power Mgt 1b Event Reg Blk address */
- struct acpi_generic_address xpm1a_control_block; /* 64-bit Extended Power Mgt 1a Control Reg Blk address */
- struct acpi_generic_address xpm1b_control_block; /* 64-bit Extended Power Mgt 1b Control Reg Blk address */
- struct acpi_generic_address xpm2_control_block; /* 64-bit Extended Power Mgt 2 Control Reg Blk address */
- struct acpi_generic_address xpm_timer_block; /* 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */
- struct acpi_generic_address xgpe0_block; /* 64-bit Extended General Purpose Event 0 Reg Blk address */
- struct acpi_generic_address xgpe1_block; /* 64-bit Extended General Purpose Event 1 Reg Blk address */
- struct acpi_generic_address sleep_control; /* 64-bit Sleep Control register (ACPI 5.0) */
- struct acpi_generic_address sleep_status; /* 64-bit Sleep Status register (ACPI 5.0) */
- u64 hypervisor_id; /* Hypervisor Vendor ID (ACPI 6.0) */
+ struct acpi_table_header header; /* [V1] Common ACPI table header */
+ u32 facs; /* [V1] 32-bit physical address of FACS */
+ u32 dsdt; /* [V1] 32-bit physical address of DSDT */
+ u8 model; /* [V1] System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */
+ u8 preferred_profile; /* [V1] Conveys preferred power management profile to OSPM. */
+ u16 sci_interrupt; /* [V1] System vector of SCI interrupt */
+ u32 smi_command; /* [V1] 32-bit Port address of SMI command port */
+ u8 acpi_enable; /* [V1] Value to write to SMI_CMD to enable ACPI */
+ u8 acpi_disable; /* [V1] Value to write to SMI_CMD to disable ACPI */
+ u8 s4_bios_request; /* [V1] Value to write to SMI_CMD to enter S4BIOS state */
+ u8 pstate_control; /* [V1] Processor performance state control */
+ u32 pm1a_event_block; /* [V1] 32-bit port address of Power Mgt 1a Event Reg Blk */
+ u32 pm1b_event_block; /* [V1] 32-bit port address of Power Mgt 1b Event Reg Blk */
+ u32 pm1a_control_block; /* [V1] 32-bit port address of Power Mgt 1a Control Reg Blk */
+ u32 pm1b_control_block; /* [V1] 32-bit port address of Power Mgt 1b Control Reg Blk */
+ u32 pm2_control_block; /* [V1] 32-bit port address of Power Mgt 2 Control Reg Blk */
+ u32 pm_timer_block; /* [V1] 32-bit port address of Power Mgt Timer Ctrl Reg Blk */
+ u32 gpe0_block; /* [V1] 32-bit port address of General Purpose Event 0 Reg Blk */
+ u32 gpe1_block; /* [V1] 32-bit port address of General Purpose Event 1 Reg Blk */
+ u8 pm1_event_length; /* [V1] Byte Length of ports at pm1x_event_block */
+ u8 pm1_control_length; /* [V1] Byte Length of ports at pm1x_control_block */
+ u8 pm2_control_length; /* [V1] Byte Length of ports at pm2_control_block */
+ u8 pm_timer_length; /* [V1] Byte Length of ports at pm_timer_block */
+ u8 gpe0_block_length; /* [V1] Byte Length of ports at gpe0_block */
+ u8 gpe1_block_length; /* [V1] Byte Length of ports at gpe1_block */
+ u8 gpe1_base; /* [V1] Offset in GPE number space where GPE1 events start */
+ u8 cst_control; /* [V1] Support for the _CST object and C-States change notification */
+ u16 c2_latency; /* [V1] Worst case HW latency to enter/exit C2 state */
+ u16 c3_latency; /* [V1] Worst case HW latency to enter/exit C3 state */
+ u16 flush_size; /* [V1] Processor memory cache line width, in bytes */
+ u16 flush_stride; /* [V1] Number of flush strides that need to be read */
+ u8 duty_offset; /* [V1] Processor duty cycle index in processor P_CNT reg */
+ u8 duty_width; /* [V1] Processor duty cycle value bit width in P_CNT register */
+ u8 day_alarm; /* [V1] Index to day-of-month alarm in RTC CMOS RAM */
+ u8 month_alarm; /* [V1] Index to month-of-year alarm in RTC CMOS RAM */
+ u8 century; /* [V1] Index to century in RTC CMOS RAM */
+ u16 boot_flags; /* [V3] IA-PC Boot Architecture Flags (see below for individual flags) */
+ u8 reserved; /* [V1] Reserved, must be zero */
+ u32 flags; /* [V1] Miscellaneous flag bits (see below for individual flags) */
+ /* End of Version 1 FADT fields (ACPI 1.0) */
+
+ struct acpi_generic_address reset_register; /* [V3] 64-bit address of the Reset register */
+ u8 reset_value; /* [V3] Value to write to the reset_register port to reset the system */
+ u16 arm_boot_flags; /* [V5] ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */
+ u8 minor_revision; /* [V5] FADT Minor Revision (ACPI 5.1) */
+ u64 Xfacs; /* [V3] 64-bit physical address of FACS */
+ u64 Xdsdt; /* [V3] 64-bit physical address of DSDT */
+ struct acpi_generic_address xpm1a_event_block; /* [V3] 64-bit Extended Power Mgt 1a Event Reg Blk address */
+ struct acpi_generic_address xpm1b_event_block; /* [V3] 64-bit Extended Power Mgt 1b Event Reg Blk address */
+ struct acpi_generic_address xpm1a_control_block; /* [V3] 64-bit Extended Power Mgt 1a Control Reg Blk address */
+ struct acpi_generic_address xpm1b_control_block; /* [V3] 64-bit Extended Power Mgt 1b Control Reg Blk address */
+ struct acpi_generic_address xpm2_control_block; /* [V3] 64-bit Extended Power Mgt 2 Control Reg Blk address */
+ struct acpi_generic_address xpm_timer_block; /* [V3] 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */
+ struct acpi_generic_address xgpe0_block; /* [V3] 64-bit Extended General Purpose Event 0 Reg Blk address */
+ struct acpi_generic_address xgpe1_block; /* [V3] 64-bit Extended General Purpose Event 1 Reg Blk address */
+ /* End of Version 3 FADT fields (ACPI 2.0) */
+
+ struct acpi_generic_address sleep_control; /* [V4] 64-bit Sleep Control register (ACPI 5.0) */
+ /* End of Version 4 FADT fields (ACPI 3.0 and ACPI 4.0) (Field was originally reserved in ACPI 3.0) */
+
+ struct acpi_generic_address sleep_status; /* [V5] 64-bit Sleep Status register (ACPI 5.0) */
+ /* End of Version 5 FADT fields (ACPI 5.0) */
+
+ u64 hypervisor_id; /* [V6] Hypervisor Vendor ID (ACPI 6.0) */
+ /* End of Version 6 FADT fields (ACPI 6.0) */
+
};
/* Masks for FADT IA-PC Boot Architecture Flags (boot_flags) [Vx]=Introduced in this FADT revision */
@@ -301,8 +311,8 @@
/* Masks for FADT ARM Boot Architecture Flags (arm_boot_flags) ACPI 5.1 */
-#define ACPI_FADT_PSCI_COMPLIANT (1) /* 00: [V5+] PSCI 0.2+ is implemented */
-#define ACPI_FADT_PSCI_USE_HVC (1<<1) /* 01: [V5+] HVC must be used instead of SMC as the PSCI conduit */
+#define ACPI_FADT_PSCI_COMPLIANT (1) /* 00: [V5] PSCI 0.2+ is implemented */
+#define ACPI_FADT_PSCI_USE_HVC (1<<1) /* 01: [V5] HVC must be used instead of SMC as the PSCI conduit */
/* Masks for FADT flags */
@@ -399,20 +409,34 @@
* match the expected length. In other words, the length of the
* FADT is the bottom line as to what the version really is.
*
- * For reference, the values below are as follows:
- * FADT V1 size: 0x074
- * FADT V2 size: 0x084
- * FADT V3 size: 0x0F4
- * FADT V4 size: 0x0F4
- * FADT V5 size: 0x10C
- * FADT V6 size: 0x114
+ * NOTE: There is no officialy released V2 of the FADT. This
+ * version was used only for prototyping and testing during the
+ * 32-bit to 64-bit transition. V3 was the first official 64-bit
+ * version of the FADT.
+ *
+ * Update this list of defines when a new version of the FADT is
+ * added to the ACPI specification. Note that the FADT version is
+ * only incremented when new fields are appended to the existing
+ * version. Therefore, the FADT version is competely independent
+ * from the version of the ACPI specification where it is
+ * defined.
+ *
+ * For reference, the various FADT lengths are as follows:
+ * FADT V1 size: 0x074 ACPI 1.0
+ * FADT V3 size: 0x0F4 ACPI 2.0
+ * FADT V4 size: 0x100 ACPI 3.0 and ACPI 4.0
+ * FADT V5 size: 0x10C ACPI 5.0
+ * FADT V6 size: 0x114 ACPI 6.0
*/
-#define ACPI_FADT_V1_SIZE (u32) (ACPI_FADT_OFFSET (flags) + 4)
-#define ACPI_FADT_V2_SIZE (u32) (ACPI_FADT_OFFSET (minor_revision) + 1)
-#define ACPI_FADT_V3_SIZE (u32) (ACPI_FADT_OFFSET (sleep_control))
-#define ACPI_FADT_V5_SIZE (u32) (ACPI_FADT_OFFSET (hypervisor_id))
-#define ACPI_FADT_V6_SIZE (u32) (sizeof (struct acpi_table_fadt))
+#define ACPI_FADT_V1_SIZE (u32) (ACPI_FADT_OFFSET (flags) + 4) /* ACPI 1.0 */
+#define ACPI_FADT_V3_SIZE (u32) (ACPI_FADT_OFFSET (sleep_control)) /* ACPI 2.0 */
+#define ACPI_FADT_V4_SIZE (u32) (ACPI_FADT_OFFSET (sleep_status)) /* ACPI 3.0 and ACPI 4.0 */
+#define ACPI_FADT_V5_SIZE (u32) (ACPI_FADT_OFFSET (hypervisor_id)) /* ACPI 5.0 */
+#define ACPI_FADT_V6_SIZE (u32) (sizeof (struct acpi_table_fadt)) /* ACPI 6.0 */
+/* Update these when new FADT versions are added */
+
+#define ACPI_FADT_MAX_VERSION 6
#define ACPI_FADT_CONFORMANCE "ACPI 6.1 (FADT version 6)"
#endif /* __ACTBL_H__ */
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index cb389ef..1d798ab 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -732,16 +732,17 @@
* The encoding of acpi_event_status is illustrated below.
* Note that a set bit (1) indicates the property is TRUE
* (e.g. if bit 0 is set then the event is enabled).
- * +-------------+-+-+-+-+-+
- * | Bits 31:5 |4|3|2|1|0|
- * +-------------+-+-+-+-+-+
- * | | | | | |
- * | | | | | +- Enabled?
- * | | | | +--- Enabled for wake?
- * | | | +----- Status bit set?
- * | | +------- Enable bit set?
- * | +--------- Has a handler?
- * +--------------- <Reserved>
+ * +-------------+-+-+-+-+-+-+
+ * | Bits 31:6 |5|4|3|2|1|0|
+ * +-------------+-+-+-+-+-+-+
+ * | | | | | | |
+ * | | | | | | +- Enabled?
+ * | | | | | +--- Enabled for wake?
+ * | | | | +----- Status bit set?
+ * | | | +------- Enable bit set?
+ * | | +--------- Has a handler?
+ * | +----------- Masked?
+ * +----------------- <Reserved>
*/
typedef u32 acpi_event_status;
@@ -751,6 +752,7 @@
#define ACPI_EVENT_FLAG_STATUS_SET (acpi_event_status) 0x04
#define ACPI_EVENT_FLAG_ENABLE_SET (acpi_event_status) 0x08
#define ACPI_EVENT_FLAG_HAS_HANDLER (acpi_event_status) 0x10
+#define ACPI_EVENT_FLAG_MASKED (acpi_event_status) 0x20
#define ACPI_EVENT_FLAG_SET ACPI_EVENT_FLAG_STATUS_SET
/* Actions for acpi_set_gpe, acpi_gpe_wakeup, acpi_hw_low_set_gpe */
@@ -761,14 +763,15 @@
/*
* GPE info flags - Per GPE
- * +-------+-+-+---+
- * | 7:5 |4|3|2:0|
- * +-------+-+-+---+
- * | | | |
- * | | | +-- Type of dispatch:to method, handler, notify, or none
- * | | +----- Interrupt type: edge or level triggered
- * | +------- Is a Wake GPE
- * +------------ <Reserved>
+ * +---+-+-+-+---+
+ * |7:6|5|4|3|2:0|
+ * +---+-+-+-+---+
+ * | | | | |
+ * | | | | +-- Type of dispatch:to method, handler, notify, or none
+ * | | | +----- Interrupt type: edge or level triggered
+ * | | +------- Is a Wake GPE
+ * | +--------- Is GPE masked by the software GPE masking machanism
+ * +------------ <Reserved>
*/
#define ACPI_GPE_DISPATCH_NONE (u8) 0x00
#define ACPI_GPE_DISPATCH_METHOD (u8) 0x01
@@ -1031,12 +1034,6 @@
u32 method_count;
};
-/* Table Event Types */
-
-#define ACPI_TABLE_EVENT_LOAD 0x0
-#define ACPI_TABLE_EVENT_UNLOAD 0x1
-#define ACPI_NUM_TABLE_EVENTS 2
-
/*
* Types specific to the OS service interfaces
*/
@@ -1088,9 +1085,13 @@
typedef
acpi_status (*acpi_table_handler) (u32 event, void *table, void *context);
-#define ACPI_TABLE_LOAD 0x0
-#define ACPI_TABLE_UNLOAD 0x1
-#define ACPI_NUM_TABLE_EVENTS 2
+/* Table Event Types */
+
+#define ACPI_TABLE_EVENT_LOAD 0x0
+#define ACPI_TABLE_EVENT_UNLOAD 0x1
+#define ACPI_TABLE_EVENT_INSTALL 0x2
+#define ACPI_TABLE_EVENT_UNINSTALL 0x3
+#define ACPI_NUM_TABLE_EVENTS 4
/* Address Spaces (For Operation Regions) */
@@ -1285,15 +1286,6 @@
#define ACPI_OSI_WIN_8 0x0C
#define ACPI_OSI_WIN_10 0x0D
-/* Definitions of file IO */
-
-#define ACPI_FILE_READING 0x01
-#define ACPI_FILE_WRITING 0x02
-#define ACPI_FILE_BINARY 0x04
-
-#define ACPI_FILE_BEGIN 0x01
-#define ACPI_FILE_END 0x02
-
/* Definitions of getopt */
#define ACPI_OPT_END -1
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 284965c..427a7c3 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -24,7 +24,9 @@
#define CPPC_NUM_ENT 21
#define CPPC_REV 2
-#define PCC_CMD_COMPLETE 1
+#define PCC_CMD_COMPLETE_MASK (1 << 0)
+#define PCC_ERROR_MASK (1 << 2)
+
#define MAX_CPC_REG_ENT 19
/* CPPC specific PCC commands. */
@@ -49,6 +51,7 @@
*/
struct cpc_register_resource {
acpi_object_type type;
+ u64 __iomem *sys_mem_vaddr;
union {
struct cpc_reg reg;
u64 int_value;
@@ -60,8 +63,11 @@
int num_entries;
int version;
int cpu_id;
+ int write_cmd_status;
+ int write_cmd_id;
struct cpc_register_resource cpc_regs[MAX_CPC_REG_ENT];
struct acpi_psd_package domain_info;
+ struct kobject kobj;
};
/* These are indexes into the per-cpu cpc_regs[]. Order is important. */
@@ -96,7 +102,6 @@
struct cppc_perf_caps {
u32 highest_perf;
u32 nominal_perf;
- u32 reference_perf;
u32 lowest_perf;
};
@@ -108,13 +113,13 @@
struct cppc_perf_fb_ctrs {
u64 reference;
- u64 prev_reference;
u64 delivered;
- u64 prev_delivered;
+ u64 reference_perf;
+ u64 ctr_wrap_time;
};
/* Per CPU container for runtime CPPC management. */
-struct cpudata {
+struct cppc_cpudata {
int cpu;
struct cppc_perf_caps perf_caps;
struct cppc_perf_ctrls perf_ctrls;
@@ -127,6 +132,7 @@
extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
-extern int acpi_get_psd_map(struct cpudata **);
+extern int acpi_get_psd_map(struct cppc_cpudata **);
+extern unsigned int cppc_get_transition_latency(int cpu);
#endif /* _CPPC_ACPI_H*/
diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h
index 86b5a84..34cce72 100644
--- a/include/acpi/platform/acenv.h
+++ b/include/acpi/platform/acenv.h
@@ -78,6 +78,7 @@
(defined ACPI_EXAMPLE_APP)
#define ACPI_APPLICATION
#define ACPI_SINGLE_THREADED
+#define USE_NATIVE_ALLOCATE_ZEROED
#endif
/* iASL configuration */
@@ -124,7 +125,6 @@
#ifdef ACPI_DUMP_APP
#define ACPI_USE_NATIVE_MEMORY_MAPPING
-#define USE_NATIVE_ALLOCATE_ZEROED
#endif
/* acpi_names/Example configuration. Hardware disabled */
@@ -149,7 +149,6 @@
/* Common for all ACPICA applications */
#ifdef ACPI_APPLICATION
-#define ACPI_USE_SYSTEM_CLIBRARY
#define ACPI_USE_LOCAL_CACHE
#endif
@@ -167,10 +166,21 @@
/******************************************************************************
*
* Host configuration files. The compiler configuration files are included
- * by the host files.
+ * first.
*
*****************************************************************************/
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#include <acpi/platform/acgcc.h>
+
+#elif defined(_MSC_VER)
+#include "acmsvc.h"
+
+#elif defined(__INTEL_COMPILER)
+#include "acintel.h"
+
+#endif
+
#if defined(_LINUX) || defined(__linux__)
#include <acpi/platform/aclinux.h>
@@ -210,18 +220,20 @@
#elif defined(__OS2__)
#include "acos2.h"
-#elif defined(_AED_EFI)
-#include "acefi.h"
-
-#elif defined(_GNU_EFI)
-#include "acefi.h"
-
#elif defined(__HAIKU__)
#include "achaiku.h"
#elif defined(__QNX__)
#include "acqnx.h"
+/*
+ * EFI applications can be built with -nostdlib, in this case, it must be
+ * included after including all other host environmental definitions, in
+ * order to override the definitions.
+ */
+#elif defined(_AED_EFI) || defined(_GNU_EFI) || defined(_EDK2_EFI)
+#include "acefi.h"
+
#else
/* Unknown environment */
@@ -326,7 +338,8 @@
* ACPI_USE_SYSTEM_CLIBRARY - Define this if linking to an actual C library.
* Otherwise, local versions of string/memory functions will be used.
* ACPI_USE_STANDARD_HEADERS - Define this if linking to a C library and
- * the standard header files may be used.
+ * the standard header files may be used. Defining this implies that
+ * ACPI_USE_SYSTEM_CLIBRARY has been defined.
*
* The ACPICA subsystem only uses low level C library functions that do not
* call operating system services and may therefore be inlined in the code.
@@ -334,7 +347,6 @@
* It may be necessary to tailor these include files to the target
* generation environment.
*/
-#ifdef ACPI_USE_SYSTEM_CLIBRARY
/* Use the standard C library headers. We want to keep these to a minimum. */
@@ -342,57 +354,20 @@
/* Use the standard headers from the standard locations */
-#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
+#ifdef ACPI_APPLICATION
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <signal.h>
+#endif
#endif /* ACPI_USE_STANDARD_HEADERS */
-/* We will be linking to the standard Clib functions */
-
-#else
-
-/******************************************************************************
- *
- * Not using native C library, use local implementations
- *
- *****************************************************************************/
-
-/*
- * Use local definitions of C library macros and functions. These function
- * implementations may not be as efficient as an inline or assembly code
- * implementation provided by a native C library, but they are functionally
- * equivalent.
- */
-#ifndef va_arg
-
-#ifndef _VALIST
-#define _VALIST
-typedef char *va_list;
-#endif /* _VALIST */
-
-/* Storage alignment properties */
-
-#define _AUPBND (sizeof (acpi_native_int) - 1)
-#define _ADNBND (sizeof (acpi_native_int) - 1)
-
-/* Variable argument list macro definitions */
-
-#define _bnd(X, bnd) (((sizeof (X)) + (bnd)) & (~(bnd)))
-#define va_arg(ap, T) (*(T *)(((ap) += (_bnd (T, _AUPBND))) - (_bnd (T,_ADNBND))))
-#define va_end(ap) (ap = (va_list) NULL)
-#define va_start(ap, A) (void) ((ap) = (((char *) &(A)) + (_bnd (A,_AUPBND))))
-
-#endif /* va_arg */
-
-/* Use the local (ACPICA) definitions of the clib functions */
-
-#endif /* ACPI_USE_SYSTEM_CLIBRARY */
-
-#ifndef ACPI_FILE
#ifdef ACPI_APPLICATION
-#include <stdio.h>
#define ACPI_FILE FILE *
#define ACPI_FILE_OUT stdout
#define ACPI_FILE_ERR stderr
@@ -401,6 +376,9 @@
#define ACPI_FILE_OUT NULL
#define ACPI_FILE_ERR NULL
#endif /* ACPI_APPLICATION */
-#endif /* ACPI_FILE */
+
+#ifndef ACPI_INIT_FUNCTION
+#define ACPI_INIT_FUNCTION
+#endif
#endif /* __ACENV_H__ */
diff --git a/include/acpi/platform/acenvex.h b/include/acpi/platform/acenvex.h
index 4f15c1d..b3171b9 100644
--- a/include/acpi/platform/acenvex.h
+++ b/include/acpi/platform/acenvex.h
@@ -56,18 +56,25 @@
#if defined(_LINUX) || defined(__linux__)
#include <acpi/platform/aclinuxex.h>
-#elif defined(WIN32)
-#include "acwinex.h"
-
-#elif defined(_AED_EFI)
-#include "acefiex.h"
-
-#elif defined(_GNU_EFI)
-#include "acefiex.h"
-
#elif defined(__DragonFly__)
#include "acdragonflyex.h"
+/*
+ * EFI applications can be built with -nostdlib, in this case, it must be
+ * included after including all other host environmental definitions, in
+ * order to override the definitions.
+ */
+#elif defined(_AED_EFI) || defined(_GNU_EFI) || defined(_EDK2_EFI)
+#include "acefiex.h"
+
+#endif
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#include "acgccex.h"
+
+#elif defined(_MSC_VER)
+#include "acmsvcex.h"
+
#endif
/*! [End] no source code translation !*/
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index c5a216c9..8f66aaa 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -44,6 +44,12 @@
#ifndef __ACGCC_H__
#define __ACGCC_H__
+/*
+ * Use compiler specific <stdarg.h> is a good practice for even when
+ * -nostdinc is specified (i.e., ACPI_USE_STANDARD_HEADERS undefined.
+ */
+#include <stdarg.h>
+
#define ACPI_INLINE __inline__
/* Function name is used for debug output. Non-ANSI, compiler-dependent */
@@ -64,17 +70,6 @@
*/
#define ACPI_UNUSED_VAR __attribute__ ((unused))
-/*
- * Some versions of gcc implement strchr() with a buggy macro. So,
- * undef it here. Prevents error messages of this form (usually from the
- * file getopt.c):
- *
- * error: logical '&&' with non-zero constant will always evaluate as true
- */
-#ifdef strchr
-#undef strchr
-#endif
-
/* GCC supports __VA_ARGS__ in macros */
#define COMPILER_VA_MACRO 1
diff --git a/include/acpi/platform/acgccex.h b/include/acpi/platform/acgccex.h
new file mode 100644
index 0000000..46ead2c
--- /dev/null
+++ b/include/acpi/platform/acgccex.h
@@ -0,0 +1,58 @@
+/******************************************************************************
+ *
+ * Name: acgccex.h - Extra GCC specific defines, etc.
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2016, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#ifndef __ACGCCEX_H__
+#define __ACGCCEX_H__
+
+/*
+ * Some versions of gcc implement strchr() with a buggy macro. So,
+ * undef it here. Prevents error messages of this form (usually from the
+ * file getopt.c):
+ *
+ * error: logical '&&' with non-zero constant will always evaluate as true
+ */
+#ifdef strchr
+#undef strchr
+#endif
+
+#endif /* __ACGCCEX_H__ */
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 93b61b1..a5d98d1 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -92,6 +92,8 @@
#include <asm/acenv.h>
#endif
+#define ACPI_INIT_FUNCTION __init
+
#ifndef CONFIG_ACPI
/* External globals for __KERNEL__, stubs is needed */
@@ -168,13 +170,21 @@
#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_next_filename
#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_close_directory
+#define ACPI_MSG_ERROR KERN_ERR "ACPI Error: "
+#define ACPI_MSG_EXCEPTION KERN_ERR "ACPI Exception: "
+#define ACPI_MSG_WARNING KERN_WARNING "ACPI Warning: "
+#define ACPI_MSG_INFO KERN_INFO "ACPI: "
+
+#define ACPI_MSG_BIOS_ERROR KERN_ERR "ACPI BIOS Error (bug): "
+#define ACPI_MSG_BIOS_WARNING KERN_WARNING "ACPI BIOS Warning (bug): "
+
#else /* !__KERNEL__ */
-#include <stdarg.h>
-#include <string.h>
-#include <stdlib.h>
-#include <ctype.h>
+#define ACPI_USE_STANDARD_HEADERS
+
+#ifdef ACPI_USE_STANDARD_HEADERS
#include <unistd.h>
+#endif
/* Define/disable kernel-specific declarators */
@@ -205,8 +215,4 @@
#endif /* __KERNEL__ */
-/* Linux uses GCC */
-
-#include <acpi/platform/acgcc.h>
-
#endif /* __ACLINUX_H__ */
diff --git a/include/acpi/platform/aclinuxex.h b/include/acpi/platform/aclinuxex.h
index f8bb0d8..a5509d8 100644
--- a/include/acpi/platform/aclinuxex.h
+++ b/include/acpi/platform/aclinuxex.h
@@ -71,7 +71,7 @@
/*
* Overrides for in-kernel ACPICA
*/
-acpi_status __init acpi_os_initialize(void);
+acpi_status ACPI_INIT_FUNCTION acpi_os_initialize(void);
acpi_status acpi_os_terminate(void);
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index bfe6b2e..f3db11c 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -359,7 +359,7 @@
* onlined/offlined. In such case the flags.throttling will be updated.
*/
extern void acpi_processor_reevaluate_tstate(struct acpi_processor *pr,
- unsigned long action);
+ bool is_dead);
extern const struct file_operations acpi_processor_throttling_fops;
extern void acpi_processor_throttling_init(void);
#else
@@ -380,7 +380,7 @@
}
static inline void acpi_processor_reevaluate_tstate(struct acpi_processor *pr,
- unsigned long action) {}
+ bool is_dead) {}
static inline void acpi_processor_throttling_init(void) {}
#endif /* CONFIG_ACPI_CPU_FREQ_PSS */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c5eaf2f..94afcb2c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -85,6 +85,8 @@
return dev_name(&adev->dev);
}
+struct device *acpi_get_first_physical_node(struct acpi_device *adev);
+
enum acpi_irq_model_id {
ACPI_IRQ_MODEL_PIC = 0,
ACPI_IRQ_MODEL_IOAPIC,
@@ -267,12 +269,18 @@
return phys_id == PHYS_CPUID_INVALID;
}
+/* Validate the processor object's proc_id */
+bool acpi_processor_validate_proc_id(int proc_id);
+
#ifdef CONFIG_ACPI_HOTPLUG_CPU
/* Arch dependent functions for cpu hotplug support */
int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu);
int acpi_unmap_cpu(int cpu);
+int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid);
#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+void acpi_set_processor_mapping(void);
+
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr);
#endif
@@ -634,6 +642,11 @@
return NULL;
}
+static inline struct device *acpi_get_first_physical_node(struct acpi_device *adev)
+{
+ return NULL;
+}
+
static inline void acpi_early_init(void) { }
static inline void acpi_subsystem_init(void) { }
@@ -751,6 +764,12 @@
#endif /* !CONFIG_ACPI */
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+int acpi_ioapic_add(acpi_handle root);
+#else
+static inline int acpi_ioapic_add(acpi_handle root) { return 0; }
+#endif
+
#ifdef CONFIG_ACPI
void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
u32 pm1a_ctrl, u32 pm1b_ctrl));
@@ -1074,4 +1093,16 @@
static inline void acpi_table_upgrade(void) { }
#endif
+#if defined(CONFIG_ACPI) && defined(CONFIG_ACPI_WATCHDOG)
+extern bool acpi_has_watchdog(void);
+#else
+static inline bool acpi_has_watchdog(void) { return false; }
+#endif
+
+#ifdef CONFIG_ACPI_SPCR_TABLE
+int parse_spcr(bool earlycon);
+#else
+static inline int parse_spcr(bool earlycon) { return 0; }
+#endif
+
#endif /*_LINUX_ACPI_H*/
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
new file mode 100644
index 0000000..0e32dac
--- /dev/null
+++ b/include/linux/acpi_iort.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2016, Semihalf
+ * Author: Tomasz Nowicki <tn@semihalf.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#ifndef __ACPI_IORT_H__
+#define __ACPI_IORT_H__
+
+#include <linux/acpi.h>
+#include <linux/fwnode.h>
+#include <linux/irqdomain.h>
+
+int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node);
+void iort_deregister_domain_token(int trans_id);
+struct fwnode_handle *iort_find_domain_token(int trans_id);
+#ifdef CONFIG_ACPI_IORT
+void acpi_iort_init(void);
+u32 iort_msi_map_rid(struct device *dev, u32 req_id);
+struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id);
+#else
+static inline void acpi_iort_init(void) { }
+static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
+{ return req_id; }
+static inline struct irq_domain *iort_get_device_domain(struct device *dev,
+ u32 req_id)
+{ return NULL; }
+#endif
+
+#endif /* __ACPI_IORT_H__ */
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 3d8dcdd..d143c13 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -53,8 +53,14 @@
AMBA_VENDOR_ST = 0x80,
AMBA_VENDOR_QCOM = 0x51,
AMBA_VENDOR_LSI = 0xb6,
+ AMBA_VENDOR_LINUX = 0xfe, /* This value is not official */
};
+/* This is used to generate pseudo-ID for AMBA device */
+#define AMBA_LINUX_ID(conf, rev, part) \
+ (((conf) & 0xff) << 24 | ((rev) & 0xf) << 20 | \
+ AMBA_VENDOR_LINUX << 12 | ((part) & 0xfff))
+
extern struct bus_type amba_bustype;
#define to_amba_device(d) container_of(d, struct amba_device, dev)
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index d76a19b..ad0965e 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -104,6 +104,15 @@
#define UART01x_FR_CTS 0x001
#define UART01x_FR_TMSK (UART01x_FR_TXFF + UART01x_FR_BUSY)
+/*
+ * Some bits of Flag Register on ZTE device have different position from
+ * standard ones.
+ */
+#define ZX_UART01x_FR_BUSY 0x100
+#define ZX_UART01x_FR_DSR 0x008
+#define ZX_UART01x_FR_CTS 0x002
+#define ZX_UART011_FR_RI 0x001
+
#define UART011_CR_CTSEN 0x8000 /* CTS hardware flow control */
#define UART011_CR_RTSEN 0x4000 /* RTS hardware flow control */
#define UART011_CR_OUT2 0x2000 /* OUT2 */
diff --git a/include/linux/atmel_serial.h b/include/linux/atmel_serial.h
index 5a4d664..bd25605 100644
--- a/include/linux/atmel_serial.h
+++ b/include/linux/atmel_serial.h
@@ -118,6 +118,8 @@
#define ATMEL_US_BRGR 0x20 /* Baud Rate Generator Register */
#define ATMEL_US_CD GENMASK(15, 0) /* Clock Divider */
+#define ATMEL_US_FP_OFFSET 16 /* Fractional Part */
+#define ATMEL_US_FP_MASK 0x7
#define ATMEL_US_RTOR 0x24 /* Receiver Time-out Register for USART */
#define ATMEL_UA_RTOR 0x28 /* Receiver Time-out Register for UART */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 598bc99..3b77588 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -339,6 +339,24 @@
return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits);
}
+/*
+ * bitmap_from_u64 - Check and swap words within u64.
+ * @mask: source bitmap
+ * @dst: destination bitmap
+ *
+ * In 32-bit Big Endian kernel, when using (u32 *)(&val)[*]
+ * to read u64 mask, we will get the wrong word.
+ * That is "(u32 *)(&val)[0]" gets the upper 32 bits,
+ * but we expect the lower 32-bits of u64.
+ */
+static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
+{
+ dst[0] = mask & ULONG_MAX;
+
+ if (sizeof(mask) > sizeof(unsigned long))
+ dst[1] = mask >> 32;
+}
+
#endif /* __ASSEMBLY__ */
#endif /* __LINUX_BITMAP_H */
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 5261751..5f52709 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -32,6 +32,7 @@
* CAN common private data
*/
struct can_priv {
+ struct net_device *dev;
struct can_device_stats can_stats;
struct can_bittiming bittiming, data_bittiming;
@@ -47,7 +48,7 @@
u32 ctrlmode_static; /* static enabled options for driver/hardware */
int restart_ms;
- struct timer_list restart_timer;
+ struct delayed_work restart_work;
int (*do_set_bittiming)(struct net_device *dev);
int (*do_set_data_bittiming)(struct net_device *dev);
diff --git a/include/linux/cec-funcs.h b/include/linux/cec-funcs.h
index 82c3d3b..138bbf7 100644
--- a/include/linux/cec-funcs.h
+++ b/include/linux/cec-funcs.h
@@ -162,10 +162,11 @@
/* One Touch Record Feature */
-static inline void cec_msg_record_off(struct cec_msg *msg)
+static inline void cec_msg_record_off(struct cec_msg *msg, bool reply)
{
msg->len = 2;
msg->msg[1] = CEC_MSG_RECORD_OFF;
+ msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0;
}
struct cec_op_arib_data {
@@ -227,7 +228,7 @@
if (digital->service_id_method == CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL) {
*msg++ = (digital->channel.channel_number_fmt << 2) |
(digital->channel.major >> 8);
- *msg++ = digital->channel.major && 0xff;
+ *msg++ = digital->channel.major & 0xff;
*msg++ = digital->channel.minor >> 8;
*msg++ = digital->channel.minor & 0xff;
*msg++ = 0;
@@ -323,6 +324,7 @@
}
static inline void cec_msg_record_on(struct cec_msg *msg,
+ bool reply,
const struct cec_op_record_src *rec_src)
{
switch (rec_src->type) {
@@ -346,6 +348,7 @@
rec_src->ext_phys_addr.phys_addr);
break;
}
+ msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0;
}
static inline void cec_ops_record_on(const struct cec_msg *msg,
@@ -1141,6 +1144,75 @@
msg->reply = reply ? CEC_MSG_DEVICE_VENDOR_ID : 0;
}
+static inline void cec_msg_vendor_command(struct cec_msg *msg,
+ __u8 size, const __u8 *vendor_cmd)
+{
+ if (size > 14)
+ size = 14;
+ msg->len = 2 + size;
+ msg->msg[1] = CEC_MSG_VENDOR_COMMAND;
+ memcpy(msg->msg + 2, vendor_cmd, size);
+}
+
+static inline void cec_ops_vendor_command(const struct cec_msg *msg,
+ __u8 *size,
+ const __u8 **vendor_cmd)
+{
+ *size = msg->len - 2;
+
+ if (*size > 14)
+ *size = 14;
+ *vendor_cmd = msg->msg + 2;
+}
+
+static inline void cec_msg_vendor_command_with_id(struct cec_msg *msg,
+ __u32 vendor_id, __u8 size,
+ const __u8 *vendor_cmd)
+{
+ if (size > 11)
+ size = 11;
+ msg->len = 5 + size;
+ msg->msg[1] = CEC_MSG_VENDOR_COMMAND_WITH_ID;
+ msg->msg[2] = vendor_id >> 16;
+ msg->msg[3] = (vendor_id >> 8) & 0xff;
+ msg->msg[4] = vendor_id & 0xff;
+ memcpy(msg->msg + 5, vendor_cmd, size);
+}
+
+static inline void cec_ops_vendor_command_with_id(const struct cec_msg *msg,
+ __u32 *vendor_id, __u8 *size,
+ const __u8 **vendor_cmd)
+{
+ *size = msg->len - 5;
+
+ if (*size > 11)
+ *size = 11;
+ *vendor_id = (msg->msg[2] << 16) | (msg->msg[3] << 8) | msg->msg[4];
+ *vendor_cmd = msg->msg + 5;
+}
+
+static inline void cec_msg_vendor_remote_button_down(struct cec_msg *msg,
+ __u8 size,
+ const __u8 *rc_code)
+{
+ if (size > 14)
+ size = 14;
+ msg->len = 2 + size;
+ msg->msg[1] = CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN;
+ memcpy(msg->msg + 2, rc_code, size);
+}
+
+static inline void cec_ops_vendor_remote_button_down(const struct cec_msg *msg,
+ __u8 *size,
+ const __u8 **rc_code)
+{
+ *size = msg->len - 2;
+
+ if (*size > 14)
+ *size = 14;
+ *rc_code = msg->msg + 2;
+}
+
static inline void cec_msg_vendor_remote_button_up(struct cec_msg *msg)
{
msg->len = 2;
@@ -1277,7 +1349,7 @@
msg->len += 4;
msg->msg[3] = (ui_cmd->channel_identifier.channel_number_fmt << 2) |
(ui_cmd->channel_identifier.major >> 8);
- msg->msg[4] = ui_cmd->channel_identifier.major && 0xff;
+ msg->msg[4] = ui_cmd->channel_identifier.major & 0xff;
msg->msg[5] = ui_cmd->channel_identifier.minor >> 8;
msg->msg[6] = ui_cmd->channel_identifier.minor & 0xff;
break;
diff --git a/include/linux/cec.h b/include/linux/cec.h
index b3e2289..851968e 100644
--- a/include/linux/cec.h
+++ b/include/linux/cec.h
@@ -364,7 +364,7 @@
* @num_log_addrs: how many logical addresses should be claimed. Set by the
* caller.
* @vendor_id: the vendor ID of the device. Set by the caller.
- * @flags: set to 0.
+ * @flags: flags.
* @osd_name: the OSD name of the device. Set by the caller.
* @primary_device_type: the primary device type for each logical address.
* Set by the caller.
@@ -389,6 +389,9 @@
__u8 features[CEC_MAX_LOG_ADDRS][12];
};
+/* Allow a fallback to unregistered */
+#define CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK (1 << 0)
+
/* Events */
/* Event that occurs when the adapter state changes */
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 385d62e..2a5982c 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -232,8 +232,9 @@
int (*cpu_id)(struct coresight_device *csdev);
int (*trace_id)(struct coresight_device *csdev);
int (*enable)(struct coresight_device *csdev,
- struct perf_event_attr *attr, u32 mode);
- void (*disable)(struct coresight_device *csdev);
+ struct perf_event *event, u32 mode);
+ void (*disable)(struct coresight_device *csdev,
+ struct perf_event *event);
};
struct coresight_ops {
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 797d9c8..7572d9e 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -61,17 +61,8 @@
#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */
#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
-#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task,
- * not handling interrupts, soon dead.
- * Called on the dying cpu, interrupts
- * are already disabled. Must not
- * sleep, must not fail */
#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug
* lock is dropped */
-#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running.
- * Called on the new cpu, just before
- * enabling interrupts. Must not sleep,
- * must not fail */
#define CPU_BROKEN 0x000B /* CPU (unsigned)v did not die properly,
* perhaps due to preemption. */
@@ -86,9 +77,6 @@
#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
-#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN)
-#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN)
-
#ifdef CONFIG_SMP
extern bool cpuhp_tasks_frozen;
@@ -228,7 +216,11 @@
#endif /* CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_PM_SLEEP_SMP
-extern int disable_nonboot_cpus(void);
+extern int freeze_secondary_cpus(int primary);
+static inline int disable_nonboot_cpus(void)
+{
+ return freeze_secondary_cpus(0);
+}
extern void enable_nonboot_cpus(void);
#else /* !CONFIG_PM_SLEEP_SMP */
static inline int disable_nonboot_cpus(void) { return 0; }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 34bd805..7b6c446 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -16,15 +16,40 @@
CPUHP_PERF_SUPERH,
CPUHP_X86_HPET_DEAD,
CPUHP_X86_APB_DEAD,
+ CPUHP_VIRT_NET_DEAD,
+ CPUHP_SLUB_DEAD,
+ CPUHP_MM_WRITEBACK_DEAD,
+ CPUHP_SOFTIRQ_DEAD,
+ CPUHP_NET_MVNETA_DEAD,
+ CPUHP_CPUIDLE_DEAD,
+ CPUHP_ARM64_FPSIMD_DEAD,
+ CPUHP_ARM_OMAP_WAKE_DEAD,
+ CPUHP_IRQ_POLL_DEAD,
+ CPUHP_BLOCK_SOFTIRQ_DEAD,
+ CPUHP_VIRT_SCSI_DEAD,
+ CPUHP_ACPI_CPUDRV_DEAD,
+ CPUHP_S390_PFAULT_DEAD,
+ CPUHP_BLK_MQ_DEAD,
CPUHP_WORKQUEUE_PREP,
CPUHP_POWER_NUMA_PREPARE,
CPUHP_HRTIMERS_PREPARE,
CPUHP_PROFILE_PREPARE,
CPUHP_X2APIC_PREPARE,
CPUHP_SMPCFD_PREPARE,
+ CPUHP_RELAY_PREPARE,
+ CPUHP_SLAB_PREPARE,
+ CPUHP_MD_RAID5_PREPARE,
CPUHP_RCUTREE_PREP,
+ CPUHP_CPUIDLE_COUPLED_PREPARE,
+ CPUHP_POWERPC_PMAC_PREPARE,
+ CPUHP_POWERPC_MMU_CTX_PREPARE,
CPUHP_NOTIFY_PREPARE,
+ CPUHP_ARM_SHMOBILE_SCU_PREPARE,
+ CPUHP_SH_SH3X_PREPARE,
+ CPUHP_BLK_MQ_PREPARE,
CPUHP_TIMERS_DEAD,
+ CPUHP_NOTF_ERR_INJ_PREPARE,
+ CPUHP_MIPS_SOC_PREPARE,
CPUHP_BRINGUP_CPU,
CPUHP_AP_IDLE_DEAD,
CPUHP_AP_OFFLINE,
@@ -47,6 +72,8 @@
CPUHP_AP_PERF_METAG_STARTING,
CPUHP_AP_MIPS_OP_LOONGSON3_STARTING,
CPUHP_AP_ARM_VFP_STARTING,
+ CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
+ CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
CPUHP_AP_PERF_ARM_STARTING,
CPUHP_AP_ARM_L2X0_STARTING,
CPUHP_AP_ARM_ARCH_TIMER_STARTING,
@@ -70,7 +97,6 @@
CPUHP_AP_ARM64_ISNDEP_STARTING,
CPUHP_AP_SMPCFD_DYING,
CPUHP_AP_X86_TBOOT_DYING,
- CPUHP_AP_NOTIFY_STARTING,
CPUHP_AP_ONLINE,
CPUHP_TEARDOWN_CPU,
CPUHP_AP_ONLINE_IDLE,
@@ -101,7 +127,7 @@
int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke,
int (*startup)(unsigned int cpu),
- int (*teardown)(unsigned int cpu));
+ int (*teardown)(unsigned int cpu), bool multi_instance);
/**
* cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
@@ -118,7 +144,7 @@
int (*startup)(unsigned int cpu),
int (*teardown)(unsigned int cpu))
{
- return __cpuhp_setup_state(state, name, true, startup, teardown);
+ return __cpuhp_setup_state(state, name, true, startup, teardown, false);
}
/**
@@ -137,7 +163,66 @@
int (*startup)(unsigned int cpu),
int (*teardown)(unsigned int cpu))
{
- return __cpuhp_setup_state(state, name, false, startup, teardown);
+ return __cpuhp_setup_state(state, name, false, startup, teardown,
+ false);
+}
+
+/**
+ * cpuhp_setup_state_multi - Add callbacks for multi state
+ * @state: The state for which the calls are installed
+ * @name: Name of the callback.
+ * @startup: startup callback function
+ * @teardown: teardown callback function
+ *
+ * Sets the internal multi_instance flag and prepares a state to work as a multi
+ * instance callback. No callbacks are invoked at this point. The callbacks are
+ * invoked once an instance for this state are registered via
+ * @cpuhp_state_add_instance or @cpuhp_state_add_instance_nocalls.
+ */
+static inline int cpuhp_setup_state_multi(enum cpuhp_state state,
+ const char *name,
+ int (*startup)(unsigned int cpu,
+ struct hlist_node *node),
+ int (*teardown)(unsigned int cpu,
+ struct hlist_node *node))
+{
+ return __cpuhp_setup_state(state, name, false,
+ (void *) startup,
+ (void *) teardown, true);
+}
+
+int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+ bool invoke);
+
+/**
+ * cpuhp_state_add_instance - Add an instance for a state and invoke startup
+ * callback.
+ * @state: The state for which the instance is installed
+ * @node: The node for this individual state.
+ *
+ * Installs the instance for the @state and invokes the startup callback on
+ * the present cpus which have already reached the @state. The @state must have
+ * been earlier marked as multi-instance by @cpuhp_setup_state_multi.
+ */
+static inline int cpuhp_state_add_instance(enum cpuhp_state state,
+ struct hlist_node *node)
+{
+ return __cpuhp_state_add_instance(state, node, true);
+}
+
+/**
+ * cpuhp_state_add_instance_nocalls - Add an instance for a state without
+ * invoking the startup callback.
+ * @state: The state for which the instance is installed
+ * @node: The node for this individual state.
+ *
+ * Installs the instance for the @state The @state must have been earlier
+ * marked as multi-instance by @cpuhp_setup_state_multi.
+ */
+static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
+ struct hlist_node *node)
+{
+ return __cpuhp_state_add_instance(state, node, false);
}
void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
@@ -164,6 +249,51 @@
__cpuhp_remove_state(state, false);
}
+/**
+ * cpuhp_remove_multi_state - Remove hotplug multi state callback
+ * @state: The state for which the calls are removed
+ *
+ * Removes the callback functions from a multi state. This is the reverse of
+ * cpuhp_setup_state_multi(). All instances should have been removed before
+ * invoking this function.
+ */
+static inline void cpuhp_remove_multi_state(enum cpuhp_state state)
+{
+ __cpuhp_remove_state(state, false);
+}
+
+int __cpuhp_state_remove_instance(enum cpuhp_state state,
+ struct hlist_node *node, bool invoke);
+
+/**
+ * cpuhp_state_remove_instance - Remove hotplug instance from state and invoke
+ * the teardown callback
+ * @state: The state from which the instance is removed
+ * @node: The node for this individual state.
+ *
+ * Removes the instance and invokes the teardown callback on the present cpus
+ * which have already reached the @state.
+ */
+static inline int cpuhp_state_remove_instance(enum cpuhp_state state,
+ struct hlist_node *node)
+{
+ return __cpuhp_state_remove_instance(state, node, true);
+}
+
+/**
+ * cpuhp_state_remove_instance_nocalls - Remove hotplug instance from state
+ * without invoking the reatdown callback
+ * @state: The state from which the instance is removed
+ * @node: The node for this individual state.
+ *
+ * Removes the instance without invoking the teardown callback.
+ */
+static inline int cpuhp_state_remove_instance_nocalls(enum cpuhp_state state,
+ struct hlist_node *node)
+{
+ return __cpuhp_state_remove_instance(state, node, false);
+}
+
#ifdef CONFIG_SMP
void cpuhp_online_idle(enum cpuhp_state state);
#else
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 1438e23..4d3f0d1 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -45,6 +45,23 @@
extern struct srcu_struct debugfs_srcu;
+/**
+ * debugfs_real_fops - getter for the real file operation
+ * @filp: a pointer to a struct file
+ *
+ * Must only be called under the protection established by
+ * debugfs_use_file_start().
+ */
+static inline const struct file_operations *debugfs_real_fops(struct file *filp)
+ __must_hold(&debugfs_srcu)
+{
+ /*
+ * Neither the pointer to the struct file_operations, nor its
+ * contents ever change -- srcu_dereference() is not needed here.
+ */
+ return filp->f_path.dentry->d_fsdata;
+}
+
#if defined(CONFIG_DEBUG_FS)
struct dentry *debugfs_create_file(const char *name, umode_t mode,
diff --git a/include/linux/devfreq-event.h b/include/linux/devfreq-event.h
index 0a83a1e..4db00b0 100644
--- a/include/linux/devfreq-event.h
+++ b/include/linux/devfreq-event.h
@@ -148,11 +148,6 @@
return -EINVAL;
}
-static inline void *devfreq_event_get_drvdata(struct devfreq_event_dev *edev)
-{
- return ERR_PTR(-EINVAL);
-}
-
static inline struct devfreq_event_dev *devfreq_event_get_edev_by_phandle(
struct device *dev, int index)
{
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 66533e1..dc69df0 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -718,7 +718,7 @@
#define dma_mmap_writecombine dma_mmap_wc
#endif
-#ifdef CONFIG_NEED_DMA_MAP_STATE
+#if defined(CONFIG_NEED_DMA_MAP_STATE) || defined(CONFIG_DMA_API_DEBUG)
#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME
#define DEFINE_DMA_UNMAP_LEN(LEN_NAME) __u32 LEN_NAME
#define dma_unmap_addr(PTR, ADDR_NAME) ((PTR)->ADDR_NAME)
diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h
index f2e538a..ccfd0c3 100644
--- a/include/linux/dma/dw.h
+++ b/include/linux/dma/dw.h
@@ -40,8 +40,13 @@
};
/* Export to the platform drivers */
+#if IS_ENABLED(CONFIG_DW_DMAC_CORE)
int dw_dma_probe(struct dw_dma_chip *chip);
int dw_dma_remove(struct dw_dma_chip *chip);
+#else
+static inline int dw_dma_probe(struct dw_dma_chip *chip) { return -ENODEV; }
+static inline int dw_dma_remove(struct dw_dma_chip *chip) { return 0; }
+#endif /* CONFIG_DW_DMAC_CORE */
/* DMA API extensions */
struct dw_desc;
diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h
index aaff68e..197eec6 100644
--- a/include/linux/dma/hsu.h
+++ b/include/linux/dma/hsu.h
@@ -41,8 +41,7 @@
/* Export to the internal users */
int hsu_dma_get_status(struct hsu_dma_chip *chip, unsigned short nr,
u32 *status);
-irqreturn_t hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr,
- u32 status);
+int hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr, u32 status);
/* Export to the platform drivers */
int hsu_dma_probe(struct hsu_dma_chip *chip);
@@ -53,10 +52,10 @@
{
return 0;
}
-static inline irqreturn_t hsu_dma_do_irq(struct hsu_dma_chip *chip,
- unsigned short nr, u32 status)
+static inline int hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr,
+ u32 status)
{
- return IRQ_NONE;
+ return 0;
}
static inline int hsu_dma_probe(struct hsu_dma_chip *chip) { return -ENODEV; }
static inline int hsu_dma_remove(struct hsu_dma_chip *chip) { return 0; }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 0148a30..2d08948 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -20,6 +20,7 @@
#include <linux/ioport.h>
#include <linux/pfn.h>
#include <linux/pstore.h>
+#include <linux/range.h>
#include <linux/reboot.h>
#include <linux/uuid.h>
#include <linux/screen_info.h>
@@ -37,6 +38,7 @@
#define EFI_WRITE_PROTECTED ( 8 | (1UL << (BITS_PER_LONG-1)))
#define EFI_OUT_OF_RESOURCES ( 9 | (1UL << (BITS_PER_LONG-1)))
#define EFI_NOT_FOUND (14 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_ABORTED (21 | (1UL << (BITS_PER_LONG-1)))
#define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1)))
typedef unsigned long efi_status_t;
@@ -678,6 +680,18 @@
unsigned long tables;
} efi_system_table_t;
+/*
+ * Architecture independent structure for describing a memory map for the
+ * benefit of efi_memmap_init_early(), saving us the need to pass four
+ * parameters.
+ */
+struct efi_memory_map_data {
+ phys_addr_t phys_map;
+ unsigned long size;
+ unsigned long desc_version;
+ unsigned long desc_size;
+};
+
struct efi_memory_map {
phys_addr_t phys_map;
void *map;
@@ -685,6 +699,12 @@
int nr_map;
unsigned long desc_version;
unsigned long desc_size;
+ bool late;
+};
+
+struct efi_mem_range {
+ struct range range;
+ u64 attribute;
};
struct efi_fdt_params {
@@ -909,6 +929,16 @@
}
#endif
extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
+
+extern int __init efi_memmap_init_early(struct efi_memory_map_data *data);
+extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size);
+extern void __init efi_memmap_unmap(void);
+extern int __init efi_memmap_install(phys_addr_t addr, unsigned int nr_map);
+extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
+ struct range *range);
+extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap,
+ void *buf, struct efi_mem_range *mem);
+
extern int efi_config_init(efi_config_table_type_t *arch_tables);
#ifdef CONFIG_EFI_ESRT
extern void __init efi_esrt_init(void);
@@ -924,6 +954,7 @@
extern int __init efi_uart_console_only (void);
extern u64 efi_mem_desc_end(efi_memory_desc_t *md);
extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md);
+extern void efi_mem_reserve(phys_addr_t addr, u64 size);
extern void efi_initialize_iomem_resources(struct resource *code_resource,
struct resource *data_resource, struct resource *bss_resource);
extern void efi_reserve_boot_services(void);
@@ -1136,12 +1167,6 @@
};
struct efivars {
- /*
- * ->lock protects two things:
- * 1) efivarfs_list and efivars_sysfs_list
- * 2) ->ops calls
- */
- spinlock_t lock;
struct kset *kset;
struct kobject *kobject;
const struct efivar_operations *ops;
@@ -1282,8 +1307,8 @@
int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
void *data, bool duplicates, struct list_head *head);
-void efivar_entry_add(struct efivar_entry *entry, struct list_head *head);
-void efivar_entry_remove(struct efivar_entry *entry);
+int efivar_entry_add(struct efivar_entry *entry, struct list_head *head);
+int efivar_entry_remove(struct efivar_entry *entry);
int __efivar_entry_delete(struct efivar_entry *entry);
int efivar_entry_delete(struct efivar_entry *entry);
@@ -1300,7 +1325,7 @@
int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes,
bool block, unsigned long size, void *data);
-void efivar_entry_iter_begin(void);
+int efivar_entry_iter_begin(void);
void efivar_entry_iter_end(void);
int __efivar_entry_iter(int (*func)(struct efivar_entry *, void *),
@@ -1336,7 +1361,6 @@
#ifdef CONFIG_EFI_RUNTIME_MAP
int efi_runtime_map_init(struct kobject *);
-void efi_runtime_map_setup(void *, int, u32);
int efi_get_runtime_map_size(void);
int efi_get_runtime_map_desc_size(void);
int efi_runtime_map_copy(void *buf, size_t bufsz);
@@ -1346,9 +1370,6 @@
return 0;
}
-static inline void
-efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {}
-
static inline int efi_get_runtime_map_size(void)
{
return 0;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 58205f3..7268ed0 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -148,6 +148,7 @@
#define FS_PRIO_1 1 /* fanotify content based access control */
#define FS_PRIO_2 2 /* fanotify pre-content access */
unsigned int priority;
+ bool shutdown; /* group is being shut down, don't queue more events */
/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
struct mutex mark_mutex; /* protect marks_list */
@@ -179,7 +180,6 @@
spinlock_t access_lock;
struct list_head access_list;
wait_queue_head_t access_waitq;
- atomic_t bypass_perm;
#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
int f_flags;
unsigned int max_marks;
@@ -292,6 +292,8 @@
extern void fsnotify_get_group(struct fsnotify_group *group);
/* drop reference on a group from fsnotify_alloc_group */
extern void fsnotify_put_group(struct fsnotify_group *group);
+/* group destruction begins, stop queuing new events */
+extern void fsnotify_group_stop_queueing(struct fsnotify_group *group);
/* destroy group */
extern void fsnotify_destroy_group(struct fsnotify_group *group);
/* fasync handler function */
@@ -304,8 +306,6 @@
struct fsnotify_event *event,
int (*merge)(struct list_head *,
struct fsnotify_event *));
-/* Remove passed event from groups notification queue */
-extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event);
/* true if the group notification queue is empty */
extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
/* return, but do not dequeue the first event on the notification queue */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7d565af..6f93ac4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -795,7 +795,12 @@
unsigned long func;
unsigned long long calltime;
unsigned long long subtime;
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
unsigned long fp;
+#endif
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+ unsigned long *retp;
+#endif
};
/*
@@ -807,7 +812,10 @@
extern int
ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
- unsigned long frame_pointer);
+ unsigned long frame_pointer, unsigned long *retp);
+
+unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
+ unsigned long ret, unsigned long *retp);
/*
* Sometimes we don't want to trace a function with the function
@@ -870,6 +878,13 @@
return -1;
}
+static inline unsigned long
+ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret,
+ unsigned long *retp)
+{
+ return ret;
+}
+
static inline void pause_graph_tracing(void) { }
static inline void unpause_graph_tracing(void) { }
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index b10954a..cd184bd 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -674,6 +674,11 @@
HV_SIGNAL_POLICY_EXPLICIT,
};
+enum hv_numa_policy {
+ HV_BALANCED = 0,
+ HV_LOCALIZED,
+};
+
enum vmbus_device_type {
HV_IDE = 0,
HV_SCSI,
@@ -701,9 +706,6 @@
};
struct vmbus_channel {
- /* Unique channel id */
- int id;
-
struct list_head listentry;
struct hv_device *device_obj;
@@ -850,6 +852,43 @@
* ring lock to preserve the current behavior.
*/
bool acquire_ring_lock;
+ /*
+ * For performance critical channels (storage, networking
+ * etc,), Hyper-V has a mechanism to enhance the throughput
+ * at the expense of latency:
+ * When the host is to be signaled, we just set a bit in a shared page
+ * and this bit will be inspected by the hypervisor within a certain
+ * window and if the bit is set, the host will be signaled. The window
+ * of time is the monitor latency - currently around 100 usecs. This
+ * mechanism improves throughput by:
+ *
+ * A) Making the host more efficient - each time it wakes up,
+ * potentially it will process morev number of packets. The
+ * monitor latency allows a batch to build up.
+ * B) By deferring the hypercall to signal, we will also minimize
+ * the interrupts.
+ *
+ * Clearly, these optimizations improve throughput at the expense of
+ * latency. Furthermore, since the channel is shared for both
+ * control and data messages, control messages currently suffer
+ * unnecessary latency adversley impacting performance and boot
+ * time. To fix this issue, permit tagging the channel as being
+ * in "low latency" mode. In this mode, we will bypass the monitor
+ * mechanism.
+ */
+ bool low_latency;
+
+ /*
+ * NUMA distribution policy:
+ * We support teo policies:
+ * 1) Balanced: Here all performance critical channels are
+ * distributed evenly amongst all the NUMA nodes.
+ * This policy will be the default policy.
+ * 2) Localized: All channels of a given instance of a
+ * performance critical service will be assigned CPUs
+ * within a selected NUMA node.
+ */
+ enum hv_numa_policy affinity_policy;
};
@@ -870,6 +909,12 @@
c->signal_policy = policy;
}
+static inline void set_channel_affinity_state(struct vmbus_channel *c,
+ enum hv_numa_policy policy)
+{
+ c->affinity_policy = policy;
+}
+
static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
{
c->batched_reading = state;
@@ -891,6 +936,16 @@
c->outbound.ring_buffer->pending_send_sz = size;
}
+static inline void set_low_latency_mode(struct vmbus_channel *c)
+{
+ c->low_latency = true;
+}
+
+static inline void clear_low_latency_mode(struct vmbus_channel *c)
+{
+ c->low_latency = false;
+}
+
void vmbus_onmessage(void *context);
int vmbus_request_offers(void);
@@ -1257,6 +1312,27 @@
0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f)
/*
+ * Linux doesn't support the 3 devices: the first two are for
+ * Automatic Virtual Machine Activation, and the third is for
+ * Remote Desktop Virtualization.
+ * {f8e65716-3cb3-4a06-9a60-1889c5cccab5}
+ * {3375baf4-9e15-4b30-b765-67acb10d607b}
+ * {276aacf4-ac15-426c-98dd-7521ad3f01fe}
+ */
+
+#define HV_AVMA1_GUID \
+ .guid = UUID_LE(0xf8e65716, 0x3cb3, 0x4a06, 0x9a, 0x60, \
+ 0x18, 0x89, 0xc5, 0xcc, 0xca, 0xb5)
+
+#define HV_AVMA2_GUID \
+ .guid = UUID_LE(0x3375baf4, 0x9e15, 0x4b30, 0xb7, 0x65, \
+ 0x67, 0xac, 0xb1, 0x0d, 0x60, 0x7b)
+
+#define HV_RDV_GUID \
+ .guid = UUID_LE(0x276aacf4, 0xac15, 0x426c, 0x98, 0xdd, \
+ 0x75, 0x21, 0xad, 0x3f, 0x01, 0xfe)
+
+/*
* Common header for Hyper-V ICs
*/
@@ -1344,6 +1420,15 @@
u8 flags;
} __packed;
+struct ictimesync_ref_data {
+ u64 parenttime;
+ u64 vmreferencetime;
+ u8 flags;
+ char leapflags;
+ char stratum;
+ u8 reserved[3];
+} __packed;
+
struct hyperv_service_callback {
u8 msg_type;
char *log_msg;
@@ -1357,6 +1442,9 @@
struct icmsg_negotiate *, u8 *, int,
int);
+void hv_event_tasklet_disable(struct vmbus_channel *channel);
+void hv_event_tasklet_enable(struct vmbus_channel *channel);
+
void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
/*
diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h
new file mode 100644
index 0000000..3fa5ef2
--- /dev/null
+++ b/include/linux/hypervisor.h
@@ -0,0 +1,17 @@
+#ifndef __LINUX_HYPEVISOR_H
+#define __LINUX_HYPEVISOR_H
+
+/*
+ * Generic Hypervisor support
+ * Juergen Gross <jgross@suse.com>
+ */
+
+#ifdef CONFIG_HYPERVISOR_GUEST
+#include <asm/hypervisor.h>
+#else
+static inline void hypervisor_pin_vcpu(int cpu)
+{
+}
+#endif
+
+#endif /* __LINUX_HYPEVISOR_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f8834f8..325f649 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -15,6 +15,8 @@
#include <net/net_namespace.h>
#include <linux/sched/rt.h>
+#include <asm/thread_info.h>
+
#ifdef CONFIG_SMP
# define INIT_PUSHABLE_TASKS(tsk) \
.pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO),
@@ -183,12 +185,21 @@
# define INIT_KASAN(tsk)
#endif
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+# define INIT_TASK_TI(tsk) \
+ .thread_info = INIT_THREAD_INFO(tsk), \
+ .stack_refcount = ATOMIC_INIT(1),
+#else
+# define INIT_TASK_TI(tsk)
+#endif
+
/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
*/
#define INIT_TASK(tsk) \
{ \
+ INIT_TASK_TI(tsk) \
.state = 0, \
.stack = init_stack, \
.usage = ATOMIC_INIT(2), \
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index b6683f0..72f0721 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -278,7 +278,8 @@
extern int
irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
-struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs);
+struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec);
+int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec);
#else /* CONFIG_SMP */
@@ -311,11 +312,18 @@
return 0;
}
-static inline struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
+static inline struct cpumask *
+irq_create_affinity_masks(const struct cpumask *affinity, int nvec)
{
- *nr_vecs = 1;
return NULL;
}
+
+static inline int
+irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
+{
+ return maxvec;
+}
+
#endif /* CONFIG_SMP */
/*
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 0ac26c8..e798755 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -916,12 +916,20 @@
unsigned int clr, unsigned int set);
struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq);
-int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
- int num_ct, const char *name,
- irq_flow_handler_t handler,
- unsigned int clr, unsigned int set,
- enum irq_gc_flags flags);
+int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
+ int num_ct, const char *name,
+ irq_flow_handler_t handler,
+ unsigned int clr, unsigned int set,
+ enum irq_gc_flags flags);
+
+#define irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name, \
+ handler, clr, set, flags) \
+({ \
+ MAYBE_BUILD_BUG_ON(irqs_per_chip > 32); \
+ __irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name,\
+ handler, clr, set, flags); \
+})
static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
{
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 99ac022..8361c8d 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -430,9 +430,9 @@
};
struct irq_domain;
-struct device_node;
+struct fwnode_handle;
int its_cpu_init(void);
-int its_init(struct device_node *node, struct rdists *rdists,
+int its_init(struct fwnode_handle *handle, struct rdists *rdists,
struct irq_domain *domain);
static inline bool gic_enable_sre(void)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index b51beeb..c9be579 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -2,6 +2,7 @@
#define _LINUX_IRQDESC_H
#include <linux/rcupdate.h>
+#include <linux/kobject.h>
/*
* Core internal functions to deal with irq descriptors
@@ -43,6 +44,7 @@
* @force_resume_depth: number of irqactions on a irq descriptor with
* IRQF_FORCE_RESUME set
* @rcu: rcu head for delayed free
+ * @kobj: kobject used to represent this struct in sysfs
* @dir: /proc/irq/ procfs entry
* @name: flow handler name for /proc/interrupts output
*/
@@ -88,6 +90,7 @@
#endif
#ifdef CONFIG_SPARSE_IRQ
struct rcu_head rcu;
+ struct kobject kobj;
#endif
int parent_irq;
struct module *owner;
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 661af56..a0547c5 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -21,6 +21,8 @@
*
* DEFINE_STATIC_KEY_TRUE(key);
* DEFINE_STATIC_KEY_FALSE(key);
+ * DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
+ * DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count);
* static_branch_likely()
* static_branch_unlikely()
*
@@ -267,9 +269,25 @@
#define DEFINE_STATIC_KEY_TRUE(name) \
struct static_key_true name = STATIC_KEY_TRUE_INIT
+#define DECLARE_STATIC_KEY_TRUE(name) \
+ extern struct static_key_true name
+
#define DEFINE_STATIC_KEY_FALSE(name) \
struct static_key_false name = STATIC_KEY_FALSE_INIT
+#define DECLARE_STATIC_KEY_FALSE(name) \
+ extern struct static_key_false name
+
+#define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count) \
+ struct static_key_true name[count] = { \
+ [0 ... (count) - 1] = STATIC_KEY_TRUE_INIT, \
+ }
+
+#define DEFINE_STATIC_KEY_ARRAY_FALSE(name, count) \
+ struct static_key_false name[count] = { \
+ [0 ... (count) - 1] = STATIC_KEY_FALSE_INIT, \
+ }
+
extern bool ____wrong_branch_error(void);
#define static_key_enabled(x) \
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d96a611..74fd6f0 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -259,17 +259,14 @@
extern struct atomic_notifier_head panic_notifier_list;
extern long (*panic_blink)(int state);
__printf(1, 2)
-void panic(const char *fmt, ...)
- __noreturn __cold;
+void panic(const char *fmt, ...) __noreturn __cold;
void nmi_panic(struct pt_regs *regs, const char *msg);
extern void oops_enter(void);
extern void oops_exit(void);
void print_oops_end_marker(void);
extern int oops_may_print(void);
-void do_exit(long error_code)
- __noreturn;
-void complete_and_exit(struct completion *, long)
- __noreturn;
+void do_exit(long error_code) __noreturn;
+void complete_and_exit(struct completion *, long) __noreturn;
/* Internal, do not use. */
int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 2b6a204..3ffc69e 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -64,6 +64,13 @@
({ (ktime_t){ .tv64 = (lhs).tv64 + (rhs).tv64 }; })
/*
+ * Same as ktime_add(), but avoids undefined behaviour on overflow; however,
+ * this means that you must check the result for overflow yourself.
+ */
+#define ktime_add_unsafe(lhs, rhs) \
+ ({ (ktime_t){ .tv64 = (u64) (lhs).tv64 + (rhs).tv64 }; })
+
+/*
* Add a ktime_t variable and a scalar nanosecond value.
* res = kt + nsval:
*/
diff --git a/include/linux/lglock.h b/include/linux/lglock.h
deleted file mode 100644
index c92ebd1..0000000
--- a/include/linux/lglock.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Specialised local-global spinlock. Can only be declared as global variables
- * to avoid overhead and keep things simple (and we don't want to start using
- * these inside dynamically allocated structures).
- *
- * "local/global locks" (lglocks) can be used to:
- *
- * - Provide fast exclusive access to per-CPU data, with exclusive access to
- * another CPU's data allowed but possibly subject to contention, and to
- * provide very slow exclusive access to all per-CPU data.
- * - Or to provide very fast and scalable read serialisation, and to provide
- * very slow exclusive serialisation of data (not necessarily per-CPU data).
- *
- * Brlocks are also implemented as a short-hand notation for the latter use
- * case.
- *
- * Copyright 2009, 2010, Nick Piggin, Novell Inc.
- */
-#ifndef __LINUX_LGLOCK_H
-#define __LINUX_LGLOCK_H
-
-#include <linux/spinlock.h>
-#include <linux/lockdep.h>
-#include <linux/percpu.h>
-#include <linux/cpu.h>
-#include <linux/notifier.h>
-
-#ifdef CONFIG_SMP
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#define LOCKDEP_INIT_MAP lockdep_init_map
-#else
-#define LOCKDEP_INIT_MAP(a, b, c, d)
-#endif
-
-struct lglock {
- arch_spinlock_t __percpu *lock;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lock_class_key lock_key;
- struct lockdep_map lock_dep_map;
-#endif
-};
-
-#define DEFINE_LGLOCK(name) \
- static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
- = __ARCH_SPIN_LOCK_UNLOCKED; \
- struct lglock name = { .lock = &name ## _lock }
-
-#define DEFINE_STATIC_LGLOCK(name) \
- static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
- = __ARCH_SPIN_LOCK_UNLOCKED; \
- static struct lglock name = { .lock = &name ## _lock }
-
-void lg_lock_init(struct lglock *lg, char *name);
-
-void lg_local_lock(struct lglock *lg);
-void lg_local_unlock(struct lglock *lg);
-void lg_local_lock_cpu(struct lglock *lg, int cpu);
-void lg_local_unlock_cpu(struct lglock *lg, int cpu);
-
-void lg_double_lock(struct lglock *lg, int cpu1, int cpu2);
-void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2);
-
-void lg_global_lock(struct lglock *lg);
-void lg_global_unlock(struct lglock *lg);
-
-#else
-/* When !CONFIG_SMP, map lglock to spinlock */
-#define lglock spinlock
-#define DEFINE_LGLOCK(name) DEFINE_SPINLOCK(name)
-#define DEFINE_STATIC_LGLOCK(name) static DEFINE_SPINLOCK(name)
-#define lg_lock_init(lg, name) spin_lock_init(lg)
-#define lg_local_lock spin_lock
-#define lg_local_unlock spin_unlock
-#define lg_local_lock_cpu(lg, cpu) spin_lock(lg)
-#define lg_local_unlock_cpu(lg, cpu) spin_unlock(lg)
-#define lg_global_lock spin_lock
-#define lg_global_unlock spin_unlock
-#endif
-
-#endif
diff --git a/include/linux/list.h b/include/linux/list.h
index 5183138..5809e9a2 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -381,8 +381,11 @@
*
* Note that if the list is empty, it returns NULL.
*/
-#define list_first_entry_or_null(ptr, type, member) \
- (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
+#define list_first_entry_or_null(ptr, type, member) ({ \
+ struct list_head *head__ = (ptr); \
+ struct list_head *pos__ = READ_ONCE(head__->next); \
+ pos__ != head__ ? list_entry(pos__, type, member) : NULL; \
+})
/**
* list_next_entry - get the next element in list
diff --git a/include/linux/mcb.h b/include/linux/mcb.h
index ead13d2..4097ac9 100644
--- a/include/linux/mcb.h
+++ b/include/linux/mcb.h
@@ -41,15 +41,17 @@
char name[CHAMELEON_FILENAME_LEN + 1];
int (*get_irq)(struct mcb_device *dev);
};
-#define to_mcb_bus(b) container_of((b), struct mcb_bus, dev)
+
+static inline struct mcb_bus *to_mcb_bus(struct device *dev)
+{
+ return container_of(dev, struct mcb_bus, dev);
+}
/**
* struct mcb_device - MEN Chameleon Bus device
*
- * @bus_list: internal list handling for bus code
* @dev: device in kernel representation
* @bus: mcb bus the device is plugged to
- * @subordinate: subordinate MCBus in case of bridge
* @is_added: flag to check if device is added to bus
* @driver: associated mcb_driver
* @id: mcb device id
@@ -62,10 +64,8 @@
* @memory: memory resource
*/
struct mcb_device {
- struct list_head bus_list;
struct device dev;
struct mcb_bus *bus;
- struct mcb_bus *subordinate;
bool is_added;
struct mcb_driver *driver;
u16 id;
@@ -76,8 +76,13 @@
int rev;
struct resource irq;
struct resource mem;
+ struct device *dma_dev;
};
-#define to_mcb_device(x) container_of((x), struct mcb_device, dev)
+
+static inline struct mcb_device *to_mcb_device(struct device *dev)
+{
+ return container_of(dev, struct mcb_device, dev);
+}
/**
* struct mcb_driver - MEN Chameleon Bus device driver
@@ -95,7 +100,11 @@
void (*remove)(struct mcb_device *mdev);
void (*shutdown)(struct mcb_device *mdev);
};
-#define to_mcb_driver(x) container_of((x), struct mcb_driver, driver)
+
+static inline struct mcb_driver *to_mcb_driver(struct device_driver *drv)
+{
+ return container_of(drv, struct mcb_driver, driver);
+}
static inline void *mcb_get_drvdata(struct mcb_device *dev)
{
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 5430374..722698a 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -3,6 +3,7 @@
#include <linux/major.h>
#include <linux/list.h>
#include <linux/types.h>
+#include <linux/device.h>
/*
* These allocations are managed by device@lanana.org. If you use an
@@ -70,6 +71,13 @@
extern int misc_register(struct miscdevice *misc);
extern void misc_deregister(struct miscdevice *misc);
+/*
+ * Helper macro for drivers that don't do anything special in module init / exit
+ * call. This helps in eleminating of boilerplate code.
+ */
+#define module_misc_device(__misc_device) \
+ module_driver(__misc_device, misc_register, misc_deregister)
+
#define MODULE_ALIAS_MISCDEV(minor) \
MODULE_ALIAS("char-major-" __stringify(MISC_MAJOR) \
"-" __stringify(minor))
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ef815b9..5f14534 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2019,6 +2019,8 @@
extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages);
extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages);
+extern bool vma_is_special_mapping(const struct vm_area_struct *vma,
+ const struct vm_special_mapping *sm);
extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long flags,
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index d351fd3..e5fb813 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -120,5 +120,5 @@
struct rtmsg;
int ipmr_get_route(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr,
- struct rtmsg *rtm, int nowait);
+ struct rtmsg *rtm, int nowait, u32 portid);
#endif
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 3987b64..19a1c0c 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -116,7 +116,7 @@
struct rtmsg;
extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
- struct rtmsg *rtm, int nowait);
+ struct rtmsg *rtm, int nowait, u32 portid);
#ifdef CONFIG_IPV6_MROUTE
extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
diff --git a/include/linux/msi.h b/include/linux/msi.h
index e8c81fb..0db320b 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -68,7 +68,7 @@
unsigned int nvec_used;
struct device *dev;
struct msi_msg msg;
- const struct cpumask *affinity;
+ struct cpumask *affinity;
union {
/* PCI MSI/X specific data */
@@ -123,7 +123,8 @@
}
#endif /* CONFIG_PCI_MSI */
-struct msi_desc *alloc_msi_entry(struct device *dev);
+struct msi_desc *alloc_msi_entry(struct device *dev, int nvec,
+ const struct cpumask *affinity);
void free_msi_entry(struct msi_desc *entry);
void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 26c3302..4341f32 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/init.h>
+#include <linux/errno.h>
/* Definitions used by the flattened device tree */
#define OF_DT_HEADER 0xd00dfeed /* marker */
@@ -66,6 +67,7 @@
int depth, void *data);
extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
int depth, void *data);
+extern int early_init_dt_scan_chosen_stdout(void);
extern void early_init_fdt_scan_reserved_mem(void);
extern void early_init_fdt_reserve_self(void);
extern void early_init_dt_add_memory_arch(u64 base, u64 size);
@@ -94,6 +96,7 @@
extern u64 of_flat_dt_translate_address(unsigned long node);
extern void of_fdt_limit_memory(int limit);
#else /* CONFIG_OF_FLATTREE */
+static inline int early_init_dt_scan_chosen_stdout(void) { return -ENODEV; }
static inline void early_init_fdt_scan_reserved_mem(void) {}
static inline void early_init_fdt_reserve_self(void) {}
static inline const char *of_flat_dt_get_machine_name(void) { return NULL; }
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 113ee62..0f9e567 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -151,7 +151,7 @@
* @flags: padata flags.
*/
struct padata_instance {
- struct notifier_block cpu_notifier;
+ struct hlist_node node;
struct workqueue_struct *wq;
struct parallel_data *pd;
struct padata_cpumask cpumask;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 66a1260..01e8443 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -571,56 +571,57 @@
*/
static inline int fault_in_multipages_writeable(char __user *uaddr, int size)
{
- int ret = 0;
char __user *end = uaddr + size - 1;
if (unlikely(size == 0))
- return ret;
+ return 0;
+ if (unlikely(uaddr > end))
+ return -EFAULT;
/*
* Writing zeroes into userspace here is OK, because we know that if
* the zero gets there, we'll be overwriting it.
*/
- while (uaddr <= end) {
- ret = __put_user(0, uaddr);
- if (ret != 0)
- return ret;
+ do {
+ if (unlikely(__put_user(0, uaddr) != 0))
+ return -EFAULT;
uaddr += PAGE_SIZE;
- }
+ } while (uaddr <= end);
/* Check whether the range spilled into the next page. */
if (((unsigned long)uaddr & PAGE_MASK) ==
((unsigned long)end & PAGE_MASK))
- ret = __put_user(0, end);
+ return __put_user(0, end);
- return ret;
+ return 0;
}
static inline int fault_in_multipages_readable(const char __user *uaddr,
int size)
{
volatile char c;
- int ret = 0;
const char __user *end = uaddr + size - 1;
if (unlikely(size == 0))
- return ret;
+ return 0;
- while (uaddr <= end) {
- ret = __get_user(c, uaddr);
- if (ret != 0)
- return ret;
+ if (unlikely(uaddr > end))
+ return -EFAULT;
+
+ do {
+ if (unlikely(__get_user(c, uaddr) != 0))
+ return -EFAULT;
uaddr += PAGE_SIZE;
- }
+ } while (uaddr <= end);
/* Check whether the range spilled into the next page. */
if (((unsigned long)uaddr & PAGE_MASK) ==
((unsigned long)end & PAGE_MASK)) {
- ret = __get_user(c, end);
- (void)c;
+ return __get_user(c, end);
}
- return ret;
+ (void)c;
+ return 0;
}
int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0ab8359..7cc0acb 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1126,6 +1126,7 @@
int pci_enable_resources(struct pci_dev *, int mask);
void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
int (*)(const struct pci_dev *, u8, u8));
+struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res);
#define HAVE_PCI_REQ_REGIONS 2
int __must_check pci_request_regions(struct pci_dev *, const char *);
int __must_check pci_request_regions_exclusive(struct pci_dev *, const char *);
@@ -1300,6 +1301,7 @@
unsigned int max_vecs, unsigned int flags);
void pci_free_irq_vectors(struct pci_dev *dev);
int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
+const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec);
#else
static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
@@ -1342,6 +1344,11 @@
return -EINVAL;
return dev->irq;
}
+static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev,
+ int vec)
+{
+ return cpu_possible_mask;
+}
#endif
#ifdef CONFIG_PCIEPORTBUS
@@ -1542,6 +1549,9 @@
int enable)
{ return 0; }
+static inline struct resource *pci_find_resource(struct pci_dev *dev,
+ struct resource *res)
+{ return NULL; }
static inline int pci_request_regions(struct pci_dev *dev, const char *res_name)
{ return -EIO; }
static inline void pci_release_regions(struct pci_dev *dev) { }
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index c2fa3ec..5b2e615 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -10,32 +10,122 @@
struct percpu_rw_semaphore {
struct rcu_sync rss;
- unsigned int __percpu *fast_read_ctr;
+ unsigned int __percpu *read_count;
struct rw_semaphore rw_sem;
- atomic_t slow_read_ctr;
- wait_queue_head_t write_waitq;
+ wait_queue_head_t writer;
+ int readers_block;
};
-extern void percpu_down_read(struct percpu_rw_semaphore *);
-extern int percpu_down_read_trylock(struct percpu_rw_semaphore *);
-extern void percpu_up_read(struct percpu_rw_semaphore *);
+#define DEFINE_STATIC_PERCPU_RWSEM(name) \
+static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \
+static struct percpu_rw_semaphore name = { \
+ .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \
+ .read_count = &__percpu_rwsem_rc_##name, \
+ .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
+ .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer), \
+}
+
+extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
+extern void __percpu_up_read(struct percpu_rw_semaphore *);
+
+static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem)
+{
+ might_sleep();
+
+ rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
+
+ preempt_disable();
+ /*
+ * We are in an RCU-sched read-side critical section, so the writer
+ * cannot both change sem->state from readers_fast and start checking
+ * counters while we are here. So if we see !sem->state, we know that
+ * the writer won't be checking until we're past the preempt_enable()
+ * and that one the synchronize_sched() is done, the writer will see
+ * anything we did within this RCU-sched read-size critical section.
+ */
+ __this_cpu_inc(*sem->read_count);
+ if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+ __percpu_down_read(sem, false); /* Unconditional memory barrier */
+ barrier();
+ /*
+ * The barrier() prevents the compiler from
+ * bleeding the critical section out.
+ */
+}
+
+static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+{
+ percpu_down_read_preempt_disable(sem);
+ preempt_enable();
+}
+
+static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
+{
+ int ret = 1;
+
+ preempt_disable();
+ /*
+ * Same as in percpu_down_read().
+ */
+ __this_cpu_inc(*sem->read_count);
+ if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+ ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
+ preempt_enable();
+ /*
+ * The barrier() from preempt_enable() prevents the compiler from
+ * bleeding the critical section out.
+ */
+
+ if (ret)
+ rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
+
+ return ret;
+}
+
+static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem)
+{
+ /*
+ * The barrier() prevents the compiler from
+ * bleeding the critical section out.
+ */
+ barrier();
+ /*
+ * Same as in percpu_down_read().
+ */
+ if (likely(rcu_sync_is_idle(&sem->rss)))
+ __this_cpu_dec(*sem->read_count);
+ else
+ __percpu_up_read(sem); /* Unconditional memory barrier */
+ preempt_enable();
+
+ rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
+}
+
+static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
+{
+ preempt_disable();
+ percpu_up_read_preempt_enable(sem);
+}
extern void percpu_down_write(struct percpu_rw_semaphore *);
extern void percpu_up_write(struct percpu_rw_semaphore *);
extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
const char *, struct lock_class_key *);
+
extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
-#define percpu_init_rwsem(brw) \
+#define percpu_init_rwsem(sem) \
({ \
static struct lock_class_key rwsem_key; \
- __percpu_init_rwsem(brw, #brw, &rwsem_key); \
+ __percpu_init_rwsem(sem, #sem, &rwsem_key); \
})
-
#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
+#define percpu_rwsem_assert_held(sem) \
+ lockdep_assert_held(&(sem)->rw_sem)
+
static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
{
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index e188438..8462da2 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -14,7 +14,7 @@
#include <linux/interrupt.h>
#include <linux/perf_event.h>
-
+#include <linux/sysfs.h>
#include <asm/cputype.h>
/*
@@ -77,6 +77,13 @@
struct arm_pmu *percpu_pmu;
};
+enum armpmu_attr_groups {
+ ARMPMU_ATTR_GROUP_COMMON,
+ ARMPMU_ATTR_GROUP_EVENTS,
+ ARMPMU_ATTR_GROUP_FORMATS,
+ ARMPMU_NR_ATTR_GROUPS
+};
+
struct arm_pmu {
struct pmu pmu;
cpumask_t active_irqs;
@@ -109,8 +116,10 @@
DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
struct platform_device *plat_device;
struct pmu_hw_events __percpu *hw_events;
- struct list_head entry;
+ struct hlist_node node;
struct notifier_block cpu_pm_nb;
+ /* the attr_groups array must be NULL-terminated */
+ const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1];
};
#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
@@ -151,6 +160,8 @@
const struct of_device_id *of_table,
const struct pmu_probe_info *probe_table);
+#define ARMV8_PMU_PDEV_NAME "armv8-pmu"
+
#endif /* CONFIG_ARM_PMU */
#endif /* __ARM_PMU_H__ */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2b6b43c..5c53625 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -510,9 +510,15 @@
struct perf_sample_data *,
struct pt_regs *regs);
-enum perf_group_flag {
- PERF_GROUP_SOFTWARE = 0x1,
-};
+/*
+ * Event capabilities. For event_caps and groups caps.
+ *
+ * PERF_EV_CAP_SOFTWARE: Is a software event.
+ * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
+ * from any CPU in the package where it is active.
+ */
+#define PERF_EV_CAP_SOFTWARE BIT(0)
+#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1)
#define SWEVENT_HLIST_BITS 8
#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
@@ -568,7 +574,12 @@
struct hlist_node hlist_entry;
struct list_head active_entry;
int nr_siblings;
- int group_flags;
+
+ /* Not serialized. Only written during event initialization. */
+ int event_caps;
+ /* The cumulative AND of all event_caps for events in this group. */
+ int group_caps;
+
struct perf_event *group_leader;
struct pmu *pmu;
void *pmu_private;
@@ -774,6 +785,9 @@
#ifdef CONFIG_CGROUP_PERF
struct perf_cgroup *cgrp;
#endif
+
+ struct list_head sched_cb_entry;
+ int sched_cb_usage;
};
struct perf_output_handle {
@@ -985,7 +999,7 @@
*/
static inline int is_software_event(struct perf_event *event)
{
- return event->pmu->task_ctx_nr == perf_sw_context;
+ return event->event_caps & PERF_EV_CAP_SOFTWARE;
}
extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index d15d8ba..5f0e11e 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -23,6 +23,7 @@
* @dst_id: dst request line
* @m_master: memory master for transfers on allocated channel
* @p_master: peripheral master for transfers on allocated channel
+ * @hs_polarity:set active low polarity of handshake interface
*/
struct dw_dma_slave {
struct device *dma_dev;
@@ -30,6 +31,7 @@
u8 dst_id;
u8 m_master;
u8 p_master;
+ bool hs_polarity;
};
/**
@@ -38,6 +40,7 @@
* @is_private: The device channels should be marked as private and not for
* by the general purpose DMA channel allocator.
* @is_memcpy: The device channels do support memory-to-memory transfers.
+ * @is_nollp: The device channels does not support multi block transfers.
* @chan_allocation_order: Allocate channels starting from 0 or 7
* @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
* @block_size: Maximum block size supported by the controller
@@ -49,6 +52,7 @@
unsigned int nr_channels;
bool is_private;
bool is_memcpy;
+ bool is_nollp;
#define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */
#define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */
unsigned char chan_allocation_order;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 31fec85..a09fe5c 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -51,6 +51,8 @@
struct mutex lock;
struct dev_power_governor *gov;
struct work_struct power_off_work;
+ struct fwnode_handle *provider; /* Identity of the domain provider */
+ bool has_provider;
const char *name;
atomic_t sd_count; /* Number of subdomains with power "on" */
enum gpd_status status; /* Current state of the domain */
@@ -116,7 +118,6 @@
return to_gpd_data(dev->power.subsys_data->domain_data);
}
-extern struct generic_pm_domain *pm_genpd_lookup_dev(struct device *dev);
extern int __pm_genpd_add_device(struct generic_pm_domain *genpd,
struct device *dev,
struct gpd_timing_data *td);
@@ -129,6 +130,7 @@
struct generic_pm_domain *target);
extern int pm_genpd_init(struct generic_pm_domain *genpd,
struct dev_power_governor *gov, bool is_off);
+extern int pm_genpd_remove(struct generic_pm_domain *genpd);
extern struct dev_power_governor simple_qos_governor;
extern struct dev_power_governor pm_domain_always_on_gov;
@@ -138,10 +140,6 @@
{
return ERR_PTR(-ENOSYS);
}
-static inline struct generic_pm_domain *pm_genpd_lookup_dev(struct device *dev)
-{
- return NULL;
-}
static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd,
struct device *dev,
struct gpd_timing_data *td)
@@ -168,6 +166,10 @@
{
return -ENOSYS;
}
+static inline int pm_genpd_remove(struct generic_pm_domain *genpd)
+{
+ return -ENOTSUPP;
+}
#endif
static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
@@ -192,57 +194,57 @@
unsigned int num_domains;
};
-typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args,
- void *data);
-
#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
-int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
- void *data);
+int of_genpd_add_provider_simple(struct device_node *np,
+ struct generic_pm_domain *genpd);
+int of_genpd_add_provider_onecell(struct device_node *np,
+ struct genpd_onecell_data *data);
void of_genpd_del_provider(struct device_node *np);
-struct generic_pm_domain *of_genpd_get_from_provider(
- struct of_phandle_args *genpdspec);
-
-struct generic_pm_domain *__of_genpd_xlate_simple(
- struct of_phandle_args *genpdspec,
- void *data);
-struct generic_pm_domain *__of_genpd_xlate_onecell(
- struct of_phandle_args *genpdspec,
- void *data);
+extern int of_genpd_add_device(struct of_phandle_args *args,
+ struct device *dev);
+extern int of_genpd_add_subdomain(struct of_phandle_args *parent,
+ struct of_phandle_args *new_subdomain);
+extern struct generic_pm_domain *of_genpd_remove_last(struct device_node *np);
int genpd_dev_pm_attach(struct device *dev);
#else /* !CONFIG_PM_GENERIC_DOMAINS_OF */
-static inline int __of_genpd_add_provider(struct device_node *np,
- genpd_xlate_t xlate, void *data)
+static inline int of_genpd_add_provider_simple(struct device_node *np,
+ struct generic_pm_domain *genpd)
{
- return 0;
+ return -ENOTSUPP;
}
+
+static inline int of_genpd_add_provider_onecell(struct device_node *np,
+ struct genpd_onecell_data *data)
+{
+ return -ENOTSUPP;
+}
+
static inline void of_genpd_del_provider(struct device_node *np) {}
-static inline struct generic_pm_domain *of_genpd_get_from_provider(
- struct of_phandle_args *genpdspec)
+static inline int of_genpd_add_device(struct of_phandle_args *args,
+ struct device *dev)
{
- return NULL;
+ return -ENODEV;
}
-#define __of_genpd_xlate_simple NULL
-#define __of_genpd_xlate_onecell NULL
+static inline int of_genpd_add_subdomain(struct of_phandle_args *parent,
+ struct of_phandle_args *new_subdomain)
+{
+ return -ENODEV;
+}
static inline int genpd_dev_pm_attach(struct device *dev)
{
return -ENODEV;
}
-#endif /* CONFIG_PM_GENERIC_DOMAINS_OF */
-static inline int of_genpd_add_provider_simple(struct device_node *np,
- struct generic_pm_domain *genpd)
+static inline
+struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
{
- return __of_genpd_add_provider(np, __of_genpd_xlate_simple, genpd);
+ return ERR_PTR(-ENOTSUPP);
}
-static inline int of_genpd_add_provider_onecell(struct device_node *np,
- struct genpd_onecell_data *data)
-{
- return __of_genpd_add_provider(np, __of_genpd_xlate_onecell, data);
-}
+#endif /* CONFIG_PM_GENERIC_DOMAINS_OF */
#ifdef CONFIG_PM
extern int dev_pm_domain_attach(struct device *dev, bool power_on);
diff --git a/include/linux/property.h b/include/linux/property.h
index 3a2f9ae..856e50b 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -190,7 +190,7 @@
.length = ARRAY_SIZE(_val_) * sizeof(_type_), \
.is_array = true, \
.is_string = false, \
- { .pointer = { _type_##_data = _val_ } }, \
+ { .pointer = { ._type_##_data = _val_ } }, \
}
#define PROPERTY_ENTRY_U8_ARRAY(_name_, _val_) \
diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index a63a33e..ece7ed9 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -59,6 +59,7 @@
}
extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
+extern void rcu_sync_enter_start(struct rcu_sync *);
extern void rcu_sync_enter(struct rcu_sync *);
extern void rcu_sync_exit(struct rcu_sync *);
extern void rcu_sync_dtor(struct rcu_sync *);
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 1aa62e1..321f9ed 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -334,6 +334,7 @@
void rcu_bh_qs(void);
void rcu_check_callbacks(int user);
void rcu_report_dead(unsigned int cpu);
+void rcu_cpu_starting(unsigned int cpu);
#ifndef CONFIG_TINY_RCU
void rcu_end_inkernel_boot(void);
diff --git a/include/linux/relay.h b/include/linux/relay.h
index d7c8359..ecbb34a 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -19,6 +19,7 @@
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/kref.h>
+#include <linux/percpu.h>
/*
* Tracks changes to rchan/rchan_buf structs
@@ -63,7 +64,7 @@
struct kref kref; /* channel refcount */
void *private_data; /* for user-defined data */
size_t last_toobig; /* tried to log event > subbuf size */
- struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */
+ struct rchan_buf ** __percpu buf; /* per-cpu channel buffers */
int is_global; /* One global buffer ? */
struct list_head list; /* for channel list */
struct dentry *parent; /* parent dentry passed to open */
@@ -204,7 +205,7 @@
struct rchan_buf *buf;
local_irq_save(flags);
- buf = chan->buf[smp_processor_id()];
+ buf = *this_cpu_ptr(chan->buf);
if (unlikely(buf->offset + length > chan->subbuf_size))
length = relay_switch_subbuf(buf, length);
memcpy(buf->data + buf->offset, data, length);
@@ -230,12 +231,12 @@
{
struct rchan_buf *buf;
- buf = chan->buf[get_cpu()];
+ buf = *get_cpu_ptr(chan->buf);
if (unlikely(buf->offset + length > buf->chan->subbuf_size))
length = relay_switch_subbuf(buf, length);
memcpy(buf->data + buf->offset, data, length);
buf->offset += length;
- put_cpu();
+ put_cpu_ptr(chan->buf);
}
/**
@@ -251,17 +252,19 @@
*/
static inline void *relay_reserve(struct rchan *chan, size_t length)
{
- void *reserved;
- struct rchan_buf *buf = chan->buf[smp_processor_id()];
+ void *reserved = NULL;
+ struct rchan_buf *buf = *get_cpu_ptr(chan->buf);
if (unlikely(buf->offset + length > buf->chan->subbuf_size)) {
length = relay_switch_subbuf(buf, length);
if (!length)
- return NULL;
+ goto end;
}
reserved = buf->data + buf->offset;
buf->offset += length;
+end:
+ put_cpu_ptr(chan->buf);
return reserved;
}
@@ -285,5 +288,11 @@
*/
extern const struct file_operations relay_file_operations;
+#ifdef CONFIG_RELAY
+int relay_prepare_cpu(unsigned int cpu);
+#else
+#define relay_prepare_cpu NULL
+#endif
+
#endif /* _LINUX_RELAY_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 62c68e5..7543a47 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -448,6 +448,8 @@
io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
}
+void __noreturn do_task_dead(void);
+
struct nsproxy;
struct user_namespace;
@@ -1022,7 +1024,8 @@
#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
-#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu power */
+#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */
+#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */
#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
@@ -1064,6 +1067,12 @@
struct sched_group;
+struct sched_domain_shared {
+ atomic_t ref;
+ atomic_t nr_busy_cpus;
+ int has_idle_cores;
+};
+
struct sched_domain {
/* These fields must be setup */
struct sched_domain *parent; /* top domain must be null terminated */
@@ -1094,6 +1103,8 @@
u64 max_newidle_lb_cost;
unsigned long next_decay_max_lb_cost;
+ u64 avg_scan_cost; /* select_idle_sibling */
+
#ifdef CONFIG_SCHEDSTATS
/* load_balance() stats */
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@ -1132,6 +1143,7 @@
void *private; /* used during construction */
struct rcu_head rcu; /* used during destruction */
};
+ struct sched_domain_shared *shared;
unsigned int span_weight;
/*
@@ -1165,6 +1177,7 @@
struct sd_data {
struct sched_domain **__percpu sd;
+ struct sched_domain_shared **__percpu sds;
struct sched_group **__percpu sg;
struct sched_group_capacity **__percpu sgc;
};
@@ -1458,6 +1471,13 @@
};
struct task_struct {
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ /*
+ * For reasons of header soup (see current_thread_info()), this
+ * must be the first element of task_struct.
+ */
+ struct thread_info thread_info;
+#endif
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
atomic_t usage;
@@ -1467,6 +1487,9 @@
#ifdef CONFIG_SMP
struct llist_node wake_entry;
int on_cpu;
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ unsigned int cpu; /* current CPU */
+#endif
unsigned int wakee_flips;
unsigned long wakee_flip_decay_ts;
struct task_struct *last_wakee;
@@ -1923,6 +1946,13 @@
#ifdef CONFIG_MMU
struct task_struct *oom_reaper_list;
#endif
+#ifdef CONFIG_VMAP_STACK
+ struct vm_struct *stack_vm_area;
+#endif
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ /* A live task holds one reference. */
+ atomic_t stack_refcount;
+#endif
/* CPU-specific state of this task */
struct thread_struct thread;
/*
@@ -1939,6 +1969,18 @@
# define arch_task_struct_size (sizeof(struct task_struct))
#endif
+#ifdef CONFIG_VMAP_STACK
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+ return t->stack_vm_area;
+}
+#else
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+ return NULL;
+}
+#endif
+
/* Future-safe accessor for struct task_struct's cpus_allowed. */
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
@@ -2568,12 +2610,14 @@
return p->pid == 0;
}
extern struct task_struct *curr_task(int cpu);
-extern void set_curr_task(int cpu, struct task_struct *p);
+extern void ia64_set_curr_task(int cpu, struct task_struct *p);
void yield(void);
union thread_union {
+#ifndef CONFIG_THREAD_INFO_IN_TASK
struct thread_info thread_info;
+#endif
unsigned long stack[THREAD_SIZE/sizeof(long)];
};
@@ -3061,10 +3105,34 @@
cgroup_threadgroup_change_end(tsk);
}
-#ifndef __HAVE_THREAD_FUNCTIONS
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+
+static inline struct thread_info *task_thread_info(struct task_struct *task)
+{
+ return &task->thread_info;
+}
+
+/*
+ * When accessing the stack of a non-current task that might exit, use
+ * try_get_task_stack() instead. task_stack_page will return a pointer
+ * that could get freed out from under you.
+ */
+static inline void *task_stack_page(const struct task_struct *task)
+{
+ return task->stack;
+}
+
+#define setup_thread_stack(new,old) do { } while(0)
+
+static inline unsigned long *end_of_stack(const struct task_struct *task)
+{
+ return task->stack;
+}
+
+#elif !defined(__HAVE_THREAD_FUNCTIONS)
#define task_thread_info(task) ((struct thread_info *)(task)->stack)
-#define task_stack_page(task) ((task)->stack)
+#define task_stack_page(task) ((void *)(task)->stack)
static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
{
@@ -3091,6 +3159,24 @@
}
#endif
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+ return atomic_inc_not_zero(&tsk->stack_refcount) ?
+ task_stack_page(tsk) : NULL;
+}
+
+extern void put_task_stack(struct task_struct *tsk);
+#else
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+ return task_stack_page(tsk);
+}
+
+static inline void put_task_stack(struct task_struct *tsk) {}
+#endif
+
#define task_stack_end_corrupted(task) \
(*(end_of_stack(task)) != STACK_END_MAGIC)
@@ -3206,7 +3292,11 @@
* cond_resched_lock() will drop the spinlock before scheduling,
* cond_resched_softirq() will enable bhs before scheduling.
*/
+#ifndef CONFIG_PREEMPT
extern int _cond_resched(void);
+#else
+static inline int _cond_resched(void) { return 0; }
+#endif
#define cond_resched() ({ \
___might_sleep(__FILE__, __LINE__, 0); \
@@ -3236,6 +3326,15 @@
#endif
}
+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
+{
+#ifdef CONFIG_DEBUG_PREEMPT
+ return p->preempt_disable_ip;
+#else
+ return 0;
+#endif
+}
+
/*
* Does a critical section need to be broken due to another
* task waiting?: (technically does not depend on CONFIG_PREEMPT,
@@ -3364,7 +3463,11 @@
static inline unsigned int task_cpu(const struct task_struct *p)
{
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ return p->cpu;
+#else
return task_thread_info(p)->cpu;
+#endif
}
static inline int task_node(const struct task_struct *p)
@@ -3469,15 +3572,20 @@
return task_rlimit_max(current, limit);
}
+#define SCHED_CPUFREQ_RT (1U << 0)
+#define SCHED_CPUFREQ_DL (1U << 1)
+#define SCHED_CPUFREQ_IOWAIT (1U << 2)
+
+#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
+
#ifdef CONFIG_CPU_FREQ
struct update_util_data {
- void (*func)(struct update_util_data *data,
- u64 time, unsigned long util, unsigned long max);
+ void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
};
void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
- void (*func)(struct update_util_data *data, u64 time,
- unsigned long util, unsigned long max));
+ void (*func)(struct update_util_data *data, u64 time,
+ unsigned int flags));
void cpufreq_remove_update_util_hook(int cpu);
#endif /* CONFIG_CPU_FREQ */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 2f44e20..3442014 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -367,14 +367,21 @@
#define EARLYCON_DECLARE(_name, fn) OF_EARLYCON_DECLARE(_name, "", fn)
-extern int setup_earlycon(char *buf);
extern int of_setup_earlycon(const struct earlycon_id *match,
unsigned long node,
const char *options);
+#ifdef CONFIG_SERIAL_EARLYCON
+extern bool earlycon_init_is_deferred __initdata;
+int setup_earlycon(char *buf);
+#else
+static const bool earlycon_init_is_deferred;
+static inline int setup_earlycon(char *buf) { return 0; }
+#endif
+
struct uart_port *uart_get_console(struct uart_port *ports, int nr,
struct console *c);
-int uart_parse_earlycon(char *p, unsigned char *iotype, unsigned long *addr,
+int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr,
char **options);
void uart_parse_options(char *options, int *baud, int *parity, int *bits,
int *flow);
@@ -412,7 +419,7 @@
static inline int uart_tx_stopped(struct uart_port *port)
{
struct tty_struct *tty = port->state->port.tty;
- if (tty->stopped || port->hw_stopped)
+ if ((tty && tty->stopped) || port->hw_stopped)
return 1;
return 0;
}
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 4293808..084b12b 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -650,4 +650,12 @@
unsigned int kmem_cache_size(struct kmem_cache *s);
void __init kmem_cache_init_late(void);
+#if defined(CONFIG_SMP) && defined(CONFIG_SLAB)
+int slab_prepare_cpu(unsigned int cpu);
+int slab_dead_cpu(unsigned int cpu);
+#else
+#define slab_prepare_cpu NULL
+#define slab_dead_cpu NULL
+#endif
+
#endif /* _LINUX_SLAB_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index eccae469..8e0cb7a 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -196,6 +196,9 @@
void smp_setup_processor_id(void);
+int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par,
+ bool phys);
+
/* SMP core functions */
int smpcfd_prepare_cpu(unsigned int cpu);
int smpcfd_dead_cpu(unsigned int cpu);
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 7693e39..d971837 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -245,6 +245,7 @@
return unlikely(suspend_freeze_state == FREEZE_STATE_ENTER);
}
+extern void __init pm_states_init(void);
extern void freeze_set_ops(const struct platform_freeze_ops *ops);
extern void freeze_wake(void);
@@ -279,6 +280,7 @@
static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {}
static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; }
static inline bool idle_should_freeze(void) { return false; }
+static inline void __init pm_states_init(void) {}
static inline void freeze_set_ops(const struct platform_freeze_ops *ops) {}
static inline void freeze_wake(void) {}
#endif /* !CONFIG_SUSPEND */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b17cc48..4a529c9 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -257,6 +257,7 @@
static inline void workingset_node_pages_dec(struct radix_tree_node *node)
{
+ VM_BUG_ON(!workingset_node_pages(node));
node->count--;
}
@@ -272,6 +273,7 @@
static inline void workingset_node_shadows_dec(struct radix_tree_node *node)
{
+ VM_BUG_ON(!workingset_node_shadows(node));
node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
}
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 2b5b10e..45f004e 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -13,6 +13,21 @@
struct timespec;
struct compat_timespec;
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+struct thread_info {
+ unsigned long flags; /* low level flags */
+};
+
+#define INIT_THREAD_INFO(tsk) \
+{ \
+ .flags = 0, \
+}
+#endif
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+#define current_thread_info() ((struct thread_info *)current)
+#endif
+
/*
* System call restart block.
*/
diff --git a/include/linux/time64.h b/include/linux/time64.h
index 7e5d2fa..980c71b 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -5,6 +5,7 @@
#include <linux/math64.h>
typedef __s64 time64_t;
+typedef __u64 timeu64_t;
/*
* This wants to go into uapi/linux/time.h once we agreed about the
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 816b754..09168c5 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -1,7 +1,7 @@
#ifndef _LINUX_TIMEKEEPING_H
#define _LINUX_TIMEKEEPING_H
-#include <asm-generic/errno-base.h>
+#include <linux/errno.h>
/* Included from linux/ktime.h */
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 6685a73..a45702e 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -43,7 +43,7 @@
#define TORTURE_FLAG "-torture:"
#define TOROUT_STRING(s) \
- pr_alert("%s" TORTURE_FLAG s "\n", torture_type)
+ pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s)
#define VERBOSE_TOROUT_STRING(s) \
do { if (verbose) pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s); } while (0)
#define VERBOSE_TOROUT_ERRSTRING(s) \
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index d3a2bb7..650f3dd 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -103,29 +103,40 @@
#endif
}
-static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
return read_seqcount_begin(&syncp->seq);
#else
-#if BITS_PER_LONG==32
+ return 0;
+#endif
+}
+
+static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
preempt_disable();
#endif
- return 0;
+ return __u64_stats_fetch_begin(syncp);
+}
+
+static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+ unsigned int start)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ return read_seqcount_retry(&syncp->seq, start);
+#else
+ return false;
#endif
}
static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
unsigned int start)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- return read_seqcount_retry(&syncp->seq, start);
-#else
-#if BITS_PER_LONG==32
+#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
preempt_enable();
#endif
- return false;
-#endif
+ return __u64_stats_fetch_retry(syncp, start);
}
/*
@@ -136,27 +147,19 @@
*/
static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- return read_seqcount_begin(&syncp->seq);
-#else
-#if BITS_PER_LONG==32
+#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
local_irq_disable();
#endif
- return 0;
-#endif
+ return __u64_stats_fetch_begin(syncp);
}
static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
- unsigned int start)
+ unsigned int start)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- return read_seqcount_retry(&syncp->seq, start);
-#else
-#if BITS_PER_LONG==32
+#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
local_irq_enable();
#endif
- return false;
-#endif
+ return __u64_stats_fetch_retry(syncp, start);
}
#endif /* _LINUX_U64_STATS_SYNC_H */
diff --git a/include/linux/vme.h b/include/linux/vme.h
index 71e4a6d..ea6095d 100644
--- a/include/linux/vme.h
+++ b/include/linux/vme.h
@@ -166,7 +166,7 @@
int vme_lm_count(struct vme_resource *);
int vme_lm_set(struct vme_resource *, unsigned long long, u32, u32);
int vme_lm_get(struct vme_resource *, unsigned long long *, u32 *, u32 *);
-int vme_lm_attach(struct vme_resource *, int, void (*callback)(int));
+int vme_lm_attach(struct vme_resource *, int, void (*callback)(void *), void *);
int vme_lm_detach(struct vme_resource *, int);
void vme_lm_free(struct vme_resource *);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index c3ff74d..2408e8d5 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -248,6 +248,8 @@
(!__builtin_constant_p(state) || \
state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \
+extern void init_wait_entry(wait_queue_t *__wait, int flags);
+
/*
* The below macro ___wait_event() has an explicit shadow of the __ret
* variable when used from the wait_event_*() macros.
@@ -266,12 +268,7 @@
wait_queue_t __wait; \
long __ret = ret; /* explicit shadow */ \
\
- INIT_LIST_HEAD(&__wait.task_list); \
- if (exclusive) \
- __wait.flags = WQ_FLAG_EXCLUSIVE; \
- else \
- __wait.flags = 0; \
- \
+ init_wait_entry(&__wait, exclusive ? WQ_FLAG_EXCLUSIVE : 0); \
for (;;) { \
long __int = prepare_to_wait_event(&wq, &__wait, state);\
\
@@ -280,12 +277,7 @@
\
if (___wait_is_interruptible(state) && __int) { \
__ret = __int; \
- if (exclusive) { \
- abort_exclusive_wait(&wq, &__wait, \
- state, NULL); \
- goto __out; \
- } \
- break; \
+ goto __out; \
} \
\
cmd; \
@@ -989,7 +981,6 @@
void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
-void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
long wait_woken(wait_queue_t *wait, unsigned mode, long timeout);
int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
diff --git a/include/media/cec.h b/include/media/cec.h
index dc7854b..fdb5d60 100644
--- a/include/media/cec.h
+++ b/include/media/cec.h
@@ -57,8 +57,8 @@
int minor;
bool registered;
bool unregistered;
- struct mutex fhs_lock;
struct list_head fhs;
+ struct mutex lock;
};
struct cec_adapter;
diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h
index 6793614..e693731 100644
--- a/include/net/netfilter/nf_conntrack_synproxy.h
+++ b/include/net/netfilter/nf_conntrack_synproxy.h
@@ -27,6 +27,20 @@
#endif
}
+static inline bool nf_ct_add_synproxy(struct nf_conn *ct,
+ const struct nf_conn *tmpl)
+{
+ if (tmpl && nfct_synproxy(tmpl)) {
+ if (!nfct_seqadj_ext_add(ct))
+ return false;
+
+ if (!nfct_synproxy_ext_add(ct))
+ return false;
+ }
+
+ return true;
+}
+
struct synproxy_stats {
unsigned int syn_received;
unsigned int cookie_invalid;
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index efc0174..bafe2a0 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -382,7 +382,7 @@
ADDIP_SERIAL_SIGN_BIT = (1<<31)
};
-static inline int ADDIP_SERIAL_gte(__u16 s, __u16 t)
+static inline int ADDIP_SERIAL_gte(__u32 s, __u32 t)
{
return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT);
}
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index ce93c4b..ced0df3 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -554,6 +554,9 @@
atomic_t refcnt;
+ /* How many times this chunk have been sent, for prsctp RTX policy */
+ int sent_count;
+
/* This is our link to the per-transport transmitted list. */
struct list_head transmitted_list;
@@ -603,16 +606,6 @@
/* This needs to be recoverable for SCTP_SEND_FAILED events. */
struct sctp_sndrcvinfo sinfo;
- /* We use this field to record param for prsctp policies,
- * for TTL policy, it is the time_to_drop of this chunk,
- * for RTX policy, it is the max_sent_count of this chunk,
- * for PRIO policy, it is the priority of this chunk.
- */
- unsigned long prsctp_param;
-
- /* How many times this chunk have been sent, for prsctp RTX policy */
- int sent_count;
-
/* Which association does this belong to? */
struct sctp_association *asoc;
diff --git a/include/net/sock.h b/include/net/sock.h
index ff5be7e..8741988 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1332,6 +1332,16 @@
if (!sk_has_account(sk))
return;
sk->sk_forward_alloc += size;
+
+ /* Avoid a possible overflow.
+ * TCP send queues can make this happen, if sk_mem_reclaim()
+ * is not called and more than 2 GBytes are released at once.
+ *
+ * If we reach 2 MBytes, reclaim 1 MBytes right now, there is
+ * no need to hold that much forward allocation anyway.
+ */
+ if (unlikely(sk->sk_forward_alloc >= 1 << 21))
+ __sk_mem_reclaim(sk, 1 << 20);
}
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index adfebd6..1793431 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1540,8 +1540,10 @@
void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
int xfrm6_extract_header(struct sk_buff *skb);
int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
-int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi);
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
+ struct ip6_tnl *t);
int xfrm6_transport_finish(struct sk_buff *skb, int async);
+int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t);
int xfrm6_rcv(struct sk_buff *skb);
int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
xfrm_address_t *saddr, u8 proto);
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 0dee7af..7e4cd53 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -771,12 +771,9 @@
shost->tmf_in_progress;
}
-extern bool scsi_use_blk_mq;
-
static inline bool shost_use_blk_mq(struct Scsi_Host *shost)
{
- return scsi_use_blk_mq;
-
+ return shost->use_blk_mq;
}
extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
diff --git a/include/trace/events/cpuhp.h b/include/trace/events/cpuhp.h
index a72bd93..996953d 100644
--- a/include/trace/events/cpuhp.h
+++ b/include/trace/events/cpuhp.h
@@ -33,6 +33,34 @@
__entry->cpu, __entry->target, __entry->idx, __entry->fun)
);
+TRACE_EVENT(cpuhp_multi_enter,
+
+ TP_PROTO(unsigned int cpu,
+ int target,
+ int idx,
+ int (*fun)(unsigned int, struct hlist_node *),
+ struct hlist_node *node),
+
+ TP_ARGS(cpu, target, idx, fun, node),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, cpu )
+ __field( int, target )
+ __field( int, idx )
+ __field( void *, fun )
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = cpu;
+ __entry->target = target;
+ __entry->idx = idx;
+ __entry->fun = fun;
+ ),
+
+ TP_printk("cpu: %04u target: %3d step: %3d (%pf)",
+ __entry->cpu, __entry->target, __entry->idx, __entry->fun)
+);
+
TRACE_EVENT(cpuhp_exit,
TP_PROTO(unsigned int cpu,
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 4cbbcef..70f0214 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -20,6 +20,8 @@
__field( u64, status )
__field( u64, addr )
__field( u64, misc )
+ __field( u64, synd )
+ __field( u64, ipid )
__field( u64, ip )
__field( u64, tsc )
__field( u64, walltime )
@@ -38,6 +40,8 @@
__entry->status = m->status;
__entry->addr = m->addr;
__entry->misc = m->misc;
+ __entry->synd = m->synd;
+ __entry->ipid = m->ipid;
__entry->ip = m->ip;
__entry->tsc = m->tsc;
__entry->walltime = m->time;
@@ -50,11 +54,12 @@
__entry->cpuvendor = m->cpuvendor;
),
- TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
+ TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
__entry->cpu,
__entry->mcgcap, __entry->mcgstatus,
__entry->bank, __entry->status,
- __entry->addr, __entry->misc,
+ __entry->ipid,
+ __entry->addr, __entry->misc, __entry->synd,
__entry->cs, __entry->ip,
__entry->tsc,
__entry->cpuvendor, __entry->cpuid,
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 19e5030..54e3aad 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -69,7 +69,8 @@
u64 mperf,
u64 aperf,
u64 tsc,
- u32 freq
+ u32 freq,
+ u32 io_boost
),
TP_ARGS(core_busy,
@@ -79,7 +80,8 @@
mperf,
aperf,
tsc,
- freq
+ freq,
+ io_boost
),
TP_STRUCT__entry(
@@ -91,6 +93,7 @@
__field(u64, aperf)
__field(u64, tsc)
__field(u32, freq)
+ __field(u32, io_boost)
),
TP_fast_assign(
@@ -102,9 +105,10 @@
__entry->aperf = aperf;
__entry->tsc = tsc;
__entry->freq = freq;
+ __entry->io_boost = io_boost;
),
- TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu ",
+ TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu io_boost=%lu",
(unsigned long)__entry->core_busy,
(unsigned long)__entry->scaled_busy,
(unsigned long)__entry->from,
@@ -112,7 +116,8 @@
(unsigned long long)__entry->mperf,
(unsigned long long)__entry->aperf,
(unsigned long long)__entry->tsc,
- (unsigned long)__entry->freq
+ (unsigned long)__entry->freq,
+ (unsigned long)__entry->io_boost
)
);
diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h
index 1e5ac4e7..b4c0484 100644
--- a/include/uapi/linux/serial_reg.h
+++ b/include/uapi/linux/serial_reg.h
@@ -376,5 +376,13 @@
#define UART_EXAR_TXTRG 0x0a /* Tx FIFO trigger level write-only */
#define UART_EXAR_RXTRG 0x0b /* Rx FIFO trigger level write-only */
+/*
+ * These are definitions for the Altera ALTR_16550_F32/F64/F128
+ * Normalized from 0x100 to 0x40 because of shift by 2 (32 bit regs).
+ */
+#define UART_ALTR_AFR 0x40 /* Additional Features Register */
+#define UART_ALTR_EN_TXFIFO_LW 0x01 /* Enable the TX FIFO Low Watermark */
+#define UART_ALTR_TX_LOW 0x41 /* Tx FIFO Low Watermark */
+
#endif /* _LINUX_SERIAL_REG_H */
diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h
index f184909..a4c4d73 100644
--- a/include/xen/interface/sched.h
+++ b/include/xen/interface/sched.h
@@ -3,6 +3,24 @@
*
* Scheduler state interactions
*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
* Copyright (c) 2005, Keir Fraser <keir@xensource.com>
*/
@@ -12,18 +30,30 @@
#include <xen/interface/event_channel.h>
/*
+ * Guest Scheduler Operations
+ *
+ * The SCHEDOP interface provides mechanisms for a guest to interact
+ * with the scheduler, including yield, blocking and shutting itself
+ * down.
+ */
+
+/*
* The prototype for this hypercall is:
- * long sched_op_new(int cmd, void *arg)
+ * long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...)
+ *
* @cmd == SCHEDOP_??? (scheduler operation).
* @arg == Operation-specific extra argument(s), as described below.
+ * ... == Additional Operation-specific extra arguments, described below.
*
- * **NOTE**:
- * Versions of Xen prior to 3.0.2 provide only the following legacy version
+ * Versions of Xen prior to 3.0.2 provided only the following legacy version
* of this hypercall, supporting only the commands yield, block and shutdown:
* long sched_op(int cmd, unsigned long arg)
* @cmd == SCHEDOP_??? (scheduler operation).
* @arg == 0 (SCHEDOP_yield and SCHEDOP_block)
* == SHUTDOWN_* code (SCHEDOP_shutdown)
+ *
+ * This legacy version is available to new guests as:
+ * long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg)
*/
/*
@@ -44,12 +74,17 @@
/*
* Halt execution of this domain (all VCPUs) and notify the system controller.
* @arg == pointer to sched_shutdown structure.
+ *
+ * If the sched_shutdown_t reason is SHUTDOWN_suspend then
+ * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN
+ * of the guest's start info page. RDX/EDX is the third hypercall
+ * argument.
+ *
+ * In addition, which reason is SHUTDOWN_suspend this hypercall
+ * returns 1 if suspend was cancelled or the domain was merely
+ * checkpointed, and 0 if it is resuming in a new domain.
*/
#define SCHEDOP_shutdown 2
-struct sched_shutdown {
- unsigned int reason; /* SHUTDOWN_* */
-};
-DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
/*
* Poll a set of event-channel ports. Return when one or more are pending. An
@@ -57,12 +92,6 @@
* @arg == pointer to sched_poll structure.
*/
#define SCHEDOP_poll 3
-struct sched_poll {
- GUEST_HANDLE(evtchn_port_t) ports;
- unsigned int nr_ports;
- uint64_t timeout;
-};
-DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
/*
* Declare a shutdown for another domain. The main use of this function is
@@ -71,15 +100,11 @@
* @arg == pointer to sched_remote_shutdown structure.
*/
#define SCHEDOP_remote_shutdown 4
-struct sched_remote_shutdown {
- domid_t domain_id; /* Remote domain ID */
- unsigned int reason; /* SHUTDOWN_xxx reason */
-};
/*
* Latch a shutdown code, so that when the domain later shuts down it
* reports this code to the control tools.
- * @arg == as for SCHEDOP_shutdown.
+ * @arg == sched_shutdown, as for SCHEDOP_shutdown.
*/
#define SCHEDOP_shutdown_code 5
@@ -92,10 +117,47 @@
* With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
*/
#define SCHEDOP_watchdog 6
+
+/*
+ * Override the current vcpu affinity by pinning it to one physical cpu or
+ * undo this override restoring the previous affinity.
+ * @arg == pointer to sched_pin_override structure.
+ *
+ * A negative pcpu value will undo a previous pin override and restore the
+ * previous cpu affinity.
+ * This call is allowed for the hardware domain only and requires the cpu
+ * to be part of the domain's cpupool.
+ */
+#define SCHEDOP_pin_override 7
+
+struct sched_shutdown {
+ unsigned int reason; /* SHUTDOWN_* => shutdown reason */
+};
+DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
+
+struct sched_poll {
+ GUEST_HANDLE(evtchn_port_t) ports;
+ unsigned int nr_ports;
+ uint64_t timeout;
+};
+DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
+
+struct sched_remote_shutdown {
+ domid_t domain_id; /* Remote domain ID */
+ unsigned int reason; /* SHUTDOWN_* => shutdown reason */
+};
+DEFINE_GUEST_HANDLE_STRUCT(sched_remote_shutdown);
+
struct sched_watchdog {
uint32_t id; /* watchdog ID */
uint32_t timeout; /* timeout */
};
+DEFINE_GUEST_HANDLE_STRUCT(sched_watchdog);
+
+struct sched_pin_override {
+ int32_t pcpu;
+};
+DEFINE_GUEST_HANDLE_STRUCT(sched_pin_override);
/*
* Reason codes for SCHEDOP_shutdown. These may be interpreted by control
@@ -107,6 +169,7 @@
#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */
#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */
#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */
+
/*
* Domain asked to perform 'soft reset' for it. The expected behavior is to
* reset internal Xen state for the domain returning it to the point where it
@@ -115,5 +178,6 @@
* interfaces again.
*/
#define SHUTDOWN_soft_reset 5
+#define SHUTDOWN_MAX 5 /* Maximum valid shutdown reason. */
#endif /* __XEN_PUBLIC_SCHED_H__ */
diff --git a/init/Kconfig b/init/Kconfig
index cac3f09..3b9a47f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -26,6 +26,16 @@
config BUILDTIME_EXTABLE_SORT
bool
+config THREAD_INFO_IN_TASK
+ bool
+ help
+ Select this to move thread_info off the stack into task_struct. To
+ make this work, an arch will need to remove all thread_info fields
+ except flags and fix any runtime bugs.
+
+ One subtle change that will be needed is to use try_get_task_stack()
+ and put_task_stack() in save_thread_stack_tsk() and get_wchan().
+
menu "General setup"
config BROKEN
diff --git a/init/init_task.c b/init/init_task.c
index ba0a7f36..11f83be1 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -22,5 +22,8 @@
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
*/
-union thread_union init_thread_union __init_task_data =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data = {
+#ifndef CONFIG_THREAD_INFO_IN_TASK
+ INIT_THREAD_INFO(init_task)
+#endif
+};
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d1c51b7..9ba28310 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3446,9 +3446,28 @@
* Except for the root, subtree_control must be zero for a cgroup
* with tasks so that child cgroups don't compete against tasks.
*/
- if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
- ret = -EBUSY;
- goto out_unlock;
+ if (enable && cgroup_parent(cgrp)) {
+ struct cgrp_cset_link *link;
+
+ /*
+ * Because namespaces pin csets too, @cgrp->cset_links
+ * might not be empty even when @cgrp is empty. Walk and
+ * verify each cset.
+ */
+ spin_lock_irq(&css_set_lock);
+
+ ret = 0;
+ list_for_each_entry(link, &cgrp->cset_links, cset_link) {
+ if (css_set_populated(link->cset)) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+
+ spin_unlock_irq(&css_set_lock);
+
+ if (ret)
+ goto out_unlock;
}
/* save and update control masks and prepare csses */
@@ -3899,7 +3918,9 @@
* cgroup_task_count - count the number of tasks in a cgroup.
* @cgrp: the cgroup in question
*
- * Return the number of tasks in the cgroup.
+ * Return the number of tasks in the cgroup. The returned number can be
+ * higher than the actual number of tasks due to css_set references from
+ * namespace roots and temporary usages.
*/
static int cgroup_task_count(const struct cgroup *cgrp)
{
@@ -5606,6 +5627,12 @@
BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
+ /*
+ * The latency of the synchronize_sched() is too high for cgroups,
+ * avoid it at the cost of forcing all readers into the slow path.
+ */
+ rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
+
get_user_ns(init_cgroup_ns.user_ns);
mutex_lock(&cgroup_mutex);
@@ -6270,6 +6297,12 @@
if (cgroup_sk_alloc_disabled)
return;
+ /* Socket clone path */
+ if (skcd->val) {
+ cgroup_get(sock_cgroup_ptr(skcd));
+ return;
+ }
+
rcu_read_lock();
while (true) {
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 341bf80..5df20d6 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -23,6 +23,8 @@
#include <linux/tick.h>
#include <linux/irq.h>
#include <linux/smpboot.h>
+#include <linux/relay.h>
+#include <linux/slab.h>
#include <trace/events/power.h>
#define CREATE_TRACE_POINTS
@@ -37,8 +39,9 @@
* @thread: Pointer to the hotplug thread
* @should_run: Thread should execute
* @rollback: Perform a rollback
- * @cb_stat: The state for a single callback (install/uninstall)
- * @cb: Single callback function (install/uninstall)
+ * @single: Single callback invocation
+ * @bringup: Single callback bringup or teardown selector
+ * @cb_state: The state for a single callback (install/uninstall)
* @result: Result of the operation
* @done: Signal completion to the issuer of the task
*/
@@ -49,8 +52,10 @@
struct task_struct *thread;
bool should_run;
bool rollback;
+ bool single;
+ bool bringup;
+ struct hlist_node *node;
enum cpuhp_state cb_state;
- int (*cb)(unsigned int cpu);
int result;
struct completion done;
#endif
@@ -68,35 +73,103 @@
* @cant_stop: Bringup/teardown can't be stopped at this step
*/
struct cpuhp_step {
- const char *name;
- int (*startup)(unsigned int cpu);
- int (*teardown)(unsigned int cpu);
- bool skip_onerr;
- bool cant_stop;
+ const char *name;
+ union {
+ int (*single)(unsigned int cpu);
+ int (*multi)(unsigned int cpu,
+ struct hlist_node *node);
+ } startup;
+ union {
+ int (*single)(unsigned int cpu);
+ int (*multi)(unsigned int cpu,
+ struct hlist_node *node);
+ } teardown;
+ struct hlist_head list;
+ bool skip_onerr;
+ bool cant_stop;
+ bool multi_instance;
};
static DEFINE_MUTEX(cpuhp_state_mutex);
static struct cpuhp_step cpuhp_bp_states[];
static struct cpuhp_step cpuhp_ap_states[];
+static bool cpuhp_is_ap_state(enum cpuhp_state state)
+{
+ /*
+ * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
+ * purposes as that state is handled explicitly in cpu_down.
+ */
+ return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
+}
+
+static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
+{
+ struct cpuhp_step *sp;
+
+ sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
+ return sp + state;
+}
+
/**
* cpuhp_invoke_callback _ Invoke the callbacks for a given state
* @cpu: The cpu for which the callback should be invoked
* @step: The step in the state machine
- * @cb: The callback function to invoke
+ * @bringup: True if the bringup callback should be invoked
*
- * Called from cpu hotplug and from the state register machinery
+ * Called from cpu hotplug and from the state register machinery.
*/
-static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state step,
- int (*cb)(unsigned int))
+static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
+ bool bringup, struct hlist_node *node)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
- int ret = 0;
+ struct cpuhp_step *step = cpuhp_get_step(state);
+ int (*cbm)(unsigned int cpu, struct hlist_node *node);
+ int (*cb)(unsigned int cpu);
+ int ret, cnt;
- if (cb) {
- trace_cpuhp_enter(cpu, st->target, step, cb);
+ if (!step->multi_instance) {
+ cb = bringup ? step->startup.single : step->teardown.single;
+ if (!cb)
+ return 0;
+ trace_cpuhp_enter(cpu, st->target, state, cb);
ret = cb(cpu);
- trace_cpuhp_exit(cpu, st->state, step, ret);
+ trace_cpuhp_exit(cpu, st->state, state, ret);
+ return ret;
+ }
+ cbm = bringup ? step->startup.multi : step->teardown.multi;
+ if (!cbm)
+ return 0;
+
+ /* Single invocation for instance add/remove */
+ if (node) {
+ trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
+ ret = cbm(cpu, node);
+ trace_cpuhp_exit(cpu, st->state, state, ret);
+ return ret;
+ }
+
+ /* State transition. Invoke on all instances */
+ cnt = 0;
+ hlist_for_each(node, &step->list) {
+ trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
+ ret = cbm(cpu, node);
+ trace_cpuhp_exit(cpu, st->state, state, ret);
+ if (ret)
+ goto err;
+ cnt++;
+ }
+ return 0;
+err:
+ /* Rollback the instances if one failed */
+ cbm = !bringup ? step->startup.multi : step->teardown.multi;
+ if (!cbm)
+ return ret;
+
+ hlist_for_each(node, &step->list) {
+ if (!cnt--)
+ break;
+ cbm(cpu, node);
}
return ret;
}
@@ -260,10 +333,17 @@
}
EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
+static void __cpu_hotplug_enable(void)
+{
+ if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
+ return;
+ cpu_hotplug_disabled--;
+}
+
void cpu_hotplug_enable(void)
{
cpu_maps_update_begin();
- WARN_ON(--cpu_hotplug_disabled < 0);
+ __cpu_hotplug_enable();
cpu_maps_update_done();
}
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
@@ -330,12 +410,6 @@
return 0;
}
-static int notify_starting(unsigned int cpu)
-{
- cpu_notify(CPU_STARTING, cpu);
- return 0;
-}
-
static int bringup_wait_for_ap(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@ -349,8 +423,16 @@
struct task_struct *idle = idle_thread_get(cpu);
int ret;
+ /*
+ * Some architectures have to walk the irq descriptors to
+ * setup the vector space for the cpu which comes online.
+ * Prevent irq alloc/free across the bringup.
+ */
+ irq_lock_sparse();
+
/* Arch-specific enabling code. */
ret = __cpu_up(cpu, idle);
+ irq_unlock_sparse();
if (ret) {
cpu_notify(CPU_UP_CANCELED, cpu);
return ret;
@@ -363,62 +445,55 @@
/*
* Hotplug state machine related functions
*/
-static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st,
- struct cpuhp_step *steps)
+static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
{
for (st->state++; st->state < st->target; st->state++) {
- struct cpuhp_step *step = steps + st->state;
+ struct cpuhp_step *step = cpuhp_get_step(st->state);
if (!step->skip_onerr)
- cpuhp_invoke_callback(cpu, st->state, step->startup);
+ cpuhp_invoke_callback(cpu, st->state, true, NULL);
}
}
static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
- struct cpuhp_step *steps, enum cpuhp_state target)
+ enum cpuhp_state target)
{
enum cpuhp_state prev_state = st->state;
int ret = 0;
for (; st->state > target; st->state--) {
- struct cpuhp_step *step = steps + st->state;
-
- ret = cpuhp_invoke_callback(cpu, st->state, step->teardown);
+ ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
if (ret) {
st->target = prev_state;
- undo_cpu_down(cpu, st, steps);
+ undo_cpu_down(cpu, st);
break;
}
}
return ret;
}
-static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st,
- struct cpuhp_step *steps)
+static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
{
for (st->state--; st->state > st->target; st->state--) {
- struct cpuhp_step *step = steps + st->state;
+ struct cpuhp_step *step = cpuhp_get_step(st->state);
if (!step->skip_onerr)
- cpuhp_invoke_callback(cpu, st->state, step->teardown);
+ cpuhp_invoke_callback(cpu, st->state, false, NULL);
}
}
static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
- struct cpuhp_step *steps, enum cpuhp_state target)
+ enum cpuhp_state target)
{
enum cpuhp_state prev_state = st->state;
int ret = 0;
while (st->state < target) {
- struct cpuhp_step *step;
-
st->state++;
- step = steps + st->state;
- ret = cpuhp_invoke_callback(cpu, st->state, step->startup);
+ ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
if (ret) {
st->target = prev_state;
- undo_cpu_up(cpu, st, steps);
+ undo_cpu_up(cpu, st);
break;
}
}
@@ -447,13 +522,13 @@
{
enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
- return cpuhp_down_callbacks(cpu, st, cpuhp_ap_states, target);
+ return cpuhp_down_callbacks(cpu, st, target);
}
/* Execute the online startup callbacks. Used to be CPU_ONLINE */
static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
{
- return cpuhp_up_callbacks(cpu, st, cpuhp_ap_states, st->target);
+ return cpuhp_up_callbacks(cpu, st, st->target);
}
/*
@@ -476,18 +551,20 @@
st->should_run = false;
/* Single callback invocation for [un]install ? */
- if (st->cb) {
+ if (st->single) {
if (st->cb_state < CPUHP_AP_ONLINE) {
local_irq_disable();
- ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+ ret = cpuhp_invoke_callback(cpu, st->cb_state,
+ st->bringup, st->node);
local_irq_enable();
} else {
- ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+ ret = cpuhp_invoke_callback(cpu, st->cb_state,
+ st->bringup, st->node);
}
} else if (st->rollback) {
BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
- undo_cpu_down(cpu, st, cpuhp_ap_states);
+ undo_cpu_down(cpu, st);
/*
* This is a momentary workaround to keep the notifier users
* happy. Will go away once we got rid of the notifiers.
@@ -509,8 +586,9 @@
}
/* Invoke a single callback on a remote cpu */
-static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state,
- int (*cb)(unsigned int))
+static int
+cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
+ struct hlist_node *node)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@ -522,10 +600,13 @@
* we invoke the thread function directly.
*/
if (!st->thread)
- return cpuhp_invoke_callback(cpu, state, cb);
+ return cpuhp_invoke_callback(cpu, state, bringup, node);
st->cb_state = state;
- st->cb = cb;
+ st->single = true;
+ st->bringup = bringup;
+ st->node = node;
+
/*
* Make sure the above stores are visible before should_run becomes
* true. Paired with the mb() above in cpuhp_thread_fun()
@@ -541,7 +622,7 @@
static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
{
st->result = 0;
- st->cb = NULL;
+ st->single = false;
/*
* Make sure the above stores are visible before should_run becomes
* true. Paired with the mb() above in cpuhp_thread_fun()
@@ -674,12 +755,6 @@
return err;
}
-static int notify_dying(unsigned int cpu)
-{
- cpu_notify(CPU_DYING, cpu);
- return 0;
-}
-
/* Take this CPU down. */
static int take_cpu_down(void *_param)
{
@@ -692,12 +767,16 @@
if (err < 0)
return err;
+ /*
+ * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
+ * do this step again.
+ */
+ WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
+ st->state--;
/* Invoke the former CPU_DYING callbacks */
- for (; st->state > target; st->state--) {
- struct cpuhp_step *step = cpuhp_ap_states + st->state;
+ for (; st->state > target; st->state--)
+ cpuhp_invoke_callback(cpu, st->state, false, NULL);
- cpuhp_invoke_callback(cpu, st->state, step->teardown);
- }
/* Give up timekeeping duties */
tick_handover_do_timer();
/* Park the stopper thread */
@@ -734,7 +813,7 @@
BUG_ON(cpu_online(cpu));
/*
- * The migration_call() CPU_DYING callback will have removed all
+ * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
* runnable tasks from the cpu, there's only the idle task left now
* that the migration thread is done doing the stop_machine thing.
*
@@ -787,7 +866,6 @@
#define notify_down_prepare NULL
#define takedown_cpu NULL
#define notify_dead NULL
-#define notify_dying NULL
#endif
#ifdef CONFIG_HOTPLUG_CPU
@@ -836,7 +914,7 @@
* The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
* to do the further cleanups.
*/
- ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
+ ret = cpuhp_down_callbacks(cpu, st, target);
if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
st->target = prev_state;
st->rollback = true;
@@ -877,10 +955,9 @@
#endif /*CONFIG_HOTPLUG_CPU*/
/**
- * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
+ * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
* @cpu: cpu that just started
*
- * This function calls the cpu_chain notifiers with CPU_STARTING.
* It must be called by the arch code on the new cpu, before the new cpu
* enables interrupts and before the "boot" cpu returns from __cpu_up().
*/
@@ -889,12 +966,10 @@
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
+ rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
while (st->state < target) {
- struct cpuhp_step *step;
-
st->state++;
- step = cpuhp_ap_states + st->state;
- cpuhp_invoke_callback(cpu, st->state, step->startup);
+ cpuhp_invoke_callback(cpu, st->state, true, NULL);
}
}
@@ -979,7 +1054,7 @@
* responsible for bringing it up to the target state.
*/
target = min((int)target, CPUHP_BRINGUP_CPU);
- ret = cpuhp_up_callbacks(cpu, st, cpuhp_bp_states, target);
+ ret = cpuhp_up_callbacks(cpu, st, target);
out:
cpu_hotplug_done();
return ret;
@@ -1024,12 +1099,13 @@
#ifdef CONFIG_PM_SLEEP_SMP
static cpumask_var_t frozen_cpus;
-int disable_nonboot_cpus(void)
+int freeze_secondary_cpus(int primary)
{
- int cpu, first_cpu, error = 0;
+ int cpu, error = 0;
cpu_maps_update_begin();
- first_cpu = cpumask_first(cpu_online_mask);
+ if (!cpu_online(primary))
+ primary = cpumask_first(cpu_online_mask);
/*
* We take down all of the non-boot CPUs in one shot to avoid races
* with the userspace trying to use the CPU hotplug at the same time
@@ -1038,7 +1114,7 @@
pr_info("Disabling non-boot CPUs ...\n");
for_each_online_cpu(cpu) {
- if (cpu == first_cpu)
+ if (cpu == primary)
continue;
trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
@@ -1081,7 +1157,7 @@
/* Allow everyone to use the CPU hotplug again */
cpu_maps_update_begin();
- WARN_ON(--cpu_hotplug_disabled < 0);
+ __cpu_hotplug_enable();
if (cpumask_empty(frozen_cpus))
goto out;
@@ -1170,40 +1246,50 @@
static struct cpuhp_step cpuhp_bp_states[] = {
[CPUHP_OFFLINE] = {
.name = "offline",
- .startup = NULL,
- .teardown = NULL,
+ .startup.single = NULL,
+ .teardown.single = NULL,
},
#ifdef CONFIG_SMP
[CPUHP_CREATE_THREADS]= {
- .name = "threads:create",
- .startup = smpboot_create_threads,
- .teardown = NULL,
+ .name = "threads:prepare",
+ .startup.single = smpboot_create_threads,
+ .teardown.single = NULL,
.cant_stop = true,
},
[CPUHP_PERF_PREPARE] = {
- .name = "perf prepare",
- .startup = perf_event_init_cpu,
- .teardown = perf_event_exit_cpu,
+ .name = "perf:prepare",
+ .startup.single = perf_event_init_cpu,
+ .teardown.single = perf_event_exit_cpu,
},
[CPUHP_WORKQUEUE_PREP] = {
- .name = "workqueue prepare",
- .startup = workqueue_prepare_cpu,
- .teardown = NULL,
+ .name = "workqueue:prepare",
+ .startup.single = workqueue_prepare_cpu,
+ .teardown.single = NULL,
},
[CPUHP_HRTIMERS_PREPARE] = {
- .name = "hrtimers prepare",
- .startup = hrtimers_prepare_cpu,
- .teardown = hrtimers_dead_cpu,
+ .name = "hrtimers:prepare",
+ .startup.single = hrtimers_prepare_cpu,
+ .teardown.single = hrtimers_dead_cpu,
},
[CPUHP_SMPCFD_PREPARE] = {
- .name = "SMPCFD prepare",
- .startup = smpcfd_prepare_cpu,
- .teardown = smpcfd_dead_cpu,
+ .name = "smpcfd:prepare",
+ .startup.single = smpcfd_prepare_cpu,
+ .teardown.single = smpcfd_dead_cpu,
+ },
+ [CPUHP_RELAY_PREPARE] = {
+ .name = "relay:prepare",
+ .startup.single = relay_prepare_cpu,
+ .teardown.single = NULL,
+ },
+ [CPUHP_SLAB_PREPARE] = {
+ .name = "slab:prepare",
+ .startup.single = slab_prepare_cpu,
+ .teardown.single = slab_dead_cpu,
},
[CPUHP_RCUTREE_PREP] = {
- .name = "RCU-tree prepare",
- .startup = rcutree_prepare_cpu,
- .teardown = rcutree_dead_cpu,
+ .name = "RCU/tree:prepare",
+ .startup.single = rcutree_prepare_cpu,
+ .teardown.single = rcutree_dead_cpu,
},
/*
* Preparatory and dead notifiers. Will be replaced once the notifiers
@@ -1211,8 +1297,8 @@
*/
[CPUHP_NOTIFY_PREPARE] = {
.name = "notify:prepare",
- .startup = notify_prepare,
- .teardown = notify_dead,
+ .startup.single = notify_prepare,
+ .teardown.single = notify_dead,
.skip_onerr = true,
.cant_stop = true,
},
@@ -1222,20 +1308,21 @@
* otherwise a RCU stall occurs.
*/
[CPUHP_TIMERS_DEAD] = {
- .name = "timers dead",
- .startup = NULL,
- .teardown = timers_dead_cpu,
+ .name = "timers:dead",
+ .startup.single = NULL,
+ .teardown.single = timers_dead_cpu,
},
/* Kicks the plugged cpu into life */
[CPUHP_BRINGUP_CPU] = {
.name = "cpu:bringup",
- .startup = bringup_cpu,
- .teardown = NULL,
+ .startup.single = bringup_cpu,
+ .teardown.single = NULL,
.cant_stop = true,
},
[CPUHP_AP_SMPCFD_DYING] = {
- .startup = NULL,
- .teardown = smpcfd_dying_cpu,
+ .name = "smpcfd:dying",
+ .startup.single = NULL,
+ .teardown.single = smpcfd_dying_cpu,
},
/*
* Handled on controll processor until the plugged processor manages
@@ -1243,8 +1330,8 @@
*/
[CPUHP_TEARDOWN_CPU] = {
.name = "cpu:teardown",
- .startup = NULL,
- .teardown = takedown_cpu,
+ .startup.single = NULL,
+ .teardown.single = takedown_cpu,
.cant_stop = true,
},
#else
@@ -1270,24 +1357,13 @@
/* First state is scheduler control. Interrupts are disabled */
[CPUHP_AP_SCHED_STARTING] = {
.name = "sched:starting",
- .startup = sched_cpu_starting,
- .teardown = sched_cpu_dying,
+ .startup.single = sched_cpu_starting,
+ .teardown.single = sched_cpu_dying,
},
[CPUHP_AP_RCUTREE_DYING] = {
- .startup = NULL,
- .teardown = rcutree_dying_cpu,
- },
- /*
- * Low level startup/teardown notifiers. Run with interrupts
- * disabled. Will be removed once the notifiers are converted to
- * states.
- */
- [CPUHP_AP_NOTIFY_STARTING] = {
- .name = "notify:starting",
- .startup = notify_starting,
- .teardown = notify_dying,
- .skip_onerr = true,
- .cant_stop = true,
+ .name = "RCU/tree:dying",
+ .startup.single = NULL,
+ .teardown.single = rcutree_dying_cpu,
},
/* Entry state on starting. Interrupts enabled from here on. Transient
* state for synchronsization */
@@ -1296,24 +1372,24 @@
},
/* Handle smpboot threads park/unpark */
[CPUHP_AP_SMPBOOT_THREADS] = {
- .name = "smpboot:threads",
- .startup = smpboot_unpark_threads,
- .teardown = NULL,
+ .name = "smpboot/threads:online",
+ .startup.single = smpboot_unpark_threads,
+ .teardown.single = NULL,
},
[CPUHP_AP_PERF_ONLINE] = {
- .name = "perf online",
- .startup = perf_event_init_cpu,
- .teardown = perf_event_exit_cpu,
+ .name = "perf:online",
+ .startup.single = perf_event_init_cpu,
+ .teardown.single = perf_event_exit_cpu,
},
[CPUHP_AP_WORKQUEUE_ONLINE] = {
- .name = "workqueue online",
- .startup = workqueue_online_cpu,
- .teardown = workqueue_offline_cpu,
+ .name = "workqueue:online",
+ .startup.single = workqueue_online_cpu,
+ .teardown.single = workqueue_offline_cpu,
},
[CPUHP_AP_RCUTREE_ONLINE] = {
- .name = "RCU-tree online",
- .startup = rcutree_online_cpu,
- .teardown = rcutree_offline_cpu,
+ .name = "RCU/tree:online",
+ .startup.single = rcutree_online_cpu,
+ .teardown.single = rcutree_offline_cpu,
},
/*
@@ -1322,8 +1398,8 @@
*/
[CPUHP_AP_NOTIFY_ONLINE] = {
.name = "notify:online",
- .startup = notify_online,
- .teardown = notify_down_prepare,
+ .startup.single = notify_online,
+ .teardown.single = notify_down_prepare,
.skip_onerr = true,
},
#endif
@@ -1335,16 +1411,16 @@
/* Last state is scheduler control setting the cpu active */
[CPUHP_AP_ACTIVE] = {
.name = "sched:active",
- .startup = sched_cpu_activate,
- .teardown = sched_cpu_deactivate,
+ .startup.single = sched_cpu_activate,
+ .teardown.single = sched_cpu_deactivate,
},
#endif
/* CPU is fully up and running. */
[CPUHP_ONLINE] = {
.name = "online",
- .startup = NULL,
- .teardown = NULL,
+ .startup.single = NULL,
+ .teardown.single = NULL,
},
};
@@ -1356,54 +1432,42 @@
return 0;
}
-static bool cpuhp_is_ap_state(enum cpuhp_state state)
-{
- /*
- * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
- * purposes as that state is handled explicitely in cpu_down.
- */
- return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
-}
-
-static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
-{
- struct cpuhp_step *sp;
-
- sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
- return sp + state;
-}
-
static void cpuhp_store_callbacks(enum cpuhp_state state,
const char *name,
int (*startup)(unsigned int cpu),
- int (*teardown)(unsigned int cpu))
+ int (*teardown)(unsigned int cpu),
+ bool multi_instance)
{
/* (Un)Install the callbacks for further cpu hotplug operations */
struct cpuhp_step *sp;
mutex_lock(&cpuhp_state_mutex);
sp = cpuhp_get_step(state);
- sp->startup = startup;
- sp->teardown = teardown;
+ sp->startup.single = startup;
+ sp->teardown.single = teardown;
sp->name = name;
+ sp->multi_instance = multi_instance;
+ INIT_HLIST_HEAD(&sp->list);
mutex_unlock(&cpuhp_state_mutex);
}
static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
{
- return cpuhp_get_step(state)->teardown;
+ return cpuhp_get_step(state)->teardown.single;
}
/*
* Call the startup/teardown function for a step either on the AP or
* on the current CPU.
*/
-static int cpuhp_issue_call(int cpu, enum cpuhp_state state,
- int (*cb)(unsigned int), bool bringup)
+static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
+ struct hlist_node *node)
{
+ struct cpuhp_step *sp = cpuhp_get_step(state);
int ret;
- if (!cb)
+ if ((bringup && !sp->startup.single) ||
+ (!bringup && !sp->teardown.single))
return 0;
/*
* The non AP bound callbacks can fail on bringup. On teardown
@@ -1411,11 +1475,11 @@
*/
#ifdef CONFIG_SMP
if (cpuhp_is_ap_state(state))
- ret = cpuhp_invoke_ap_callback(cpu, state, cb);
+ ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
else
- ret = cpuhp_invoke_callback(cpu, state, cb);
+ ret = cpuhp_invoke_callback(cpu, state, bringup, node);
#else
- ret = cpuhp_invoke_callback(cpu, state, cb);
+ ret = cpuhp_invoke_callback(cpu, state, bringup, node);
#endif
BUG_ON(ret && !bringup);
return ret;
@@ -1427,13 +1491,10 @@
* Note: The teardown callbacks for rollback are not allowed to fail!
*/
static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
- int (*teardown)(unsigned int cpu))
+ struct hlist_node *node)
{
int cpu;
- if (!teardown)
- return;
-
/* Roll back the already executed steps on the other cpus */
for_each_present_cpu(cpu) {
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@ -1444,7 +1505,7 @@
/* Did we invoke the startup call on that cpu ? */
if (cpustate >= state)
- cpuhp_issue_call(cpu, state, teardown, false);
+ cpuhp_issue_call(cpu, state, false, node);
}
}
@@ -1471,6 +1532,52 @@
return -ENOSPC;
}
+int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+ bool invoke)
+{
+ struct cpuhp_step *sp;
+ int cpu;
+ int ret;
+
+ sp = cpuhp_get_step(state);
+ if (sp->multi_instance == false)
+ return -EINVAL;
+
+ get_online_cpus();
+
+ if (!invoke || !sp->startup.multi)
+ goto add_node;
+
+ /*
+ * Try to call the startup callback for each present cpu
+ * depending on the hotplug state of the cpu.
+ */
+ for_each_present_cpu(cpu) {
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int cpustate = st->state;
+
+ if (cpustate < state)
+ continue;
+
+ ret = cpuhp_issue_call(cpu, state, true, node);
+ if (ret) {
+ if (sp->teardown.multi)
+ cpuhp_rollback_install(cpu, state, node);
+ goto err;
+ }
+ }
+add_node:
+ ret = 0;
+ mutex_lock(&cpuhp_state_mutex);
+ hlist_add_head(node, &sp->list);
+ mutex_unlock(&cpuhp_state_mutex);
+
+err:
+ put_online_cpus();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
+
/**
* __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
* @state: The state to setup
@@ -1484,7 +1591,8 @@
int __cpuhp_setup_state(enum cpuhp_state state,
const char *name, bool invoke,
int (*startup)(unsigned int cpu),
- int (*teardown)(unsigned int cpu))
+ int (*teardown)(unsigned int cpu),
+ bool multi_instance)
{
int cpu, ret = 0;
int dyn_state = 0;
@@ -1503,7 +1611,7 @@
state = ret;
}
- cpuhp_store_callbacks(state, name, startup, teardown);
+ cpuhp_store_callbacks(state, name, startup, teardown, multi_instance);
if (!invoke || !startup)
goto out;
@@ -1519,10 +1627,11 @@
if (cpustate < state)
continue;
- ret = cpuhp_issue_call(cpu, state, startup, true);
+ ret = cpuhp_issue_call(cpu, state, true, NULL);
if (ret) {
- cpuhp_rollback_install(cpu, state, teardown);
- cpuhp_store_callbacks(state, NULL, NULL, NULL);
+ if (teardown)
+ cpuhp_rollback_install(cpu, state, NULL);
+ cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
goto out;
}
}
@@ -1534,6 +1643,42 @@
}
EXPORT_SYMBOL(__cpuhp_setup_state);
+int __cpuhp_state_remove_instance(enum cpuhp_state state,
+ struct hlist_node *node, bool invoke)
+{
+ struct cpuhp_step *sp = cpuhp_get_step(state);
+ int cpu;
+
+ BUG_ON(cpuhp_cb_check(state));
+
+ if (!sp->multi_instance)
+ return -EINVAL;
+
+ get_online_cpus();
+ if (!invoke || !cpuhp_get_teardown_cb(state))
+ goto remove;
+ /*
+ * Call the teardown callback for each present cpu depending
+ * on the hotplug state of the cpu. This function is not
+ * allowed to fail currently!
+ */
+ for_each_present_cpu(cpu) {
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int cpustate = st->state;
+
+ if (cpustate >= state)
+ cpuhp_issue_call(cpu, state, false, node);
+ }
+
+remove:
+ mutex_lock(&cpuhp_state_mutex);
+ hlist_del(node);
+ mutex_unlock(&cpuhp_state_mutex);
+ put_online_cpus();
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
/**
* __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
* @state: The state to remove
@@ -1545,14 +1690,21 @@
*/
void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
{
- int (*teardown)(unsigned int cpu) = cpuhp_get_teardown_cb(state);
+ struct cpuhp_step *sp = cpuhp_get_step(state);
int cpu;
BUG_ON(cpuhp_cb_check(state));
get_online_cpus();
- if (!invoke || !teardown)
+ if (sp->multi_instance) {
+ WARN(!hlist_empty(&sp->list),
+ "Error: Removing state %d which has instances left.\n",
+ state);
+ goto remove;
+ }
+
+ if (!invoke || !cpuhp_get_teardown_cb(state))
goto remove;
/*
@@ -1565,10 +1717,10 @@
int cpustate = st->state;
if (cpustate >= state)
- cpuhp_issue_call(cpu, state, teardown, false);
+ cpuhp_issue_call(cpu, state, false, NULL);
}
remove:
- cpuhp_store_callbacks(state, NULL, NULL, NULL);
+ cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
put_online_cpus();
}
EXPORT_SYMBOL(__cpuhp_remove_state);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index c27e533..2b4c20a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -325,8 +325,7 @@
/*
* Return in pmask the portion of a cpusets's cpus_allowed that
* are online. If none are online, walk up the cpuset hierarchy
- * until we find one that does have some online cpus. The top
- * cpuset always has some cpus online.
+ * until we find one that does have some online cpus.
*
* One way or another, we guarantee to return some non-empty subset
* of cpu_online_mask.
@@ -335,8 +334,20 @@
*/
static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
{
- while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask))
+ while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
cs = parent_cs(cs);
+ if (unlikely(!cs)) {
+ /*
+ * The top cpuset doesn't have any online cpu as a
+ * consequence of a race between cpuset_hotplug_work
+ * and cpu hotplug notifier. But we know the top
+ * cpuset's effective_cpus is on its way to to be
+ * identical to cpu_online_mask.
+ */
+ cpumask_copy(pmask, cpu_online_mask);
+ return;
+ }
+ }
cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
}
@@ -2074,7 +2085,7 @@
* which could have been changed by cpuset just after it inherits the
* state from the parent and before it sits on the cgroup's task list.
*/
-void cpuset_fork(struct task_struct *task)
+static void cpuset_fork(struct task_struct *task)
{
if (task_css_is_root(task, cpuset_cgrp_id))
return;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a54f2c2..7c0d263 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1475,8 +1475,7 @@
if (event->group_leader == event) {
struct list_head *list;
- if (is_software_event(event))
- event->group_flags |= PERF_GROUP_SOFTWARE;
+ event->group_caps = event->event_caps;
list = ctx_group_list(event, ctx);
list_add_tail(&event->group_entry, list);
@@ -1630,9 +1629,7 @@
WARN_ON_ONCE(group_leader->ctx != event->ctx);
- if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
- !is_software_event(event))
- group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
+ group_leader->group_caps &= event->event_caps;
list_add_tail(&event->group_entry, &group_leader->sibling_list);
group_leader->nr_siblings++;
@@ -1723,7 +1720,7 @@
sibling->group_leader = sibling;
/* Inherit group flags from the previous leader */
- sibling->group_flags = event->group_flags;
+ sibling->group_caps = event->group_caps;
WARN_ON_ONCE(sibling->ctx != event->ctx);
}
@@ -1832,6 +1829,8 @@
struct perf_event *event;
int state = group_event->state;
+ perf_pmu_disable(ctx->pmu);
+
event_sched_out(group_event, cpuctx, ctx);
/*
@@ -1840,6 +1839,8 @@
list_for_each_entry(event, &group_event->sibling_list, group_entry)
event_sched_out(event, cpuctx, ctx);
+ perf_pmu_enable(ctx->pmu);
+
if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
cpuctx->exclusive = 0;
}
@@ -2145,7 +2146,7 @@
/*
* Groups consisting entirely of software events can always go on.
*/
- if (event->group_flags & PERF_GROUP_SOFTWARE)
+ if (event->group_caps & PERF_EV_CAP_SOFTWARE)
return 1;
/*
* If an exclusive group is already on, no other hardware
@@ -2491,7 +2492,7 @@
* while restarting.
*/
if (sd->restart)
- event->pmu->start(event, PERF_EF_START);
+ event->pmu->start(event, 0);
return 0;
}
@@ -2837,19 +2838,36 @@
}
}
+static DEFINE_PER_CPU(struct list_head, sched_cb_list);
+
void perf_sched_cb_dec(struct pmu *pmu)
{
+ struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
this_cpu_dec(perf_sched_cb_usages);
+
+ if (!--cpuctx->sched_cb_usage)
+ list_del(&cpuctx->sched_cb_entry);
}
+
void perf_sched_cb_inc(struct pmu *pmu)
{
+ struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+ if (!cpuctx->sched_cb_usage++)
+ list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
+
this_cpu_inc(perf_sched_cb_usages);
}
/*
* This function provides the context switch callback to the lower code
* layer. It is invoked ONLY when the context switch callback is enabled.
+ *
+ * This callback is relevant even to per-cpu events; for example multi event
+ * PEBS requires this to provide PID/TID information. This requires we flush
+ * all queued PEBS records before we context switch to a new task.
*/
static void perf_pmu_sched_task(struct task_struct *prev,
struct task_struct *next,
@@ -2857,34 +2875,24 @@
{
struct perf_cpu_context *cpuctx;
struct pmu *pmu;
- unsigned long flags;
if (prev == next)
return;
- local_irq_save(flags);
+ list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
+ pmu = cpuctx->unique_pmu; /* software PMUs will not have sched_task */
- rcu_read_lock();
+ if (WARN_ON_ONCE(!pmu->sched_task))
+ continue;
- list_for_each_entry_rcu(pmu, &pmus, entry) {
- if (pmu->sched_task) {
- cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+ perf_pmu_disable(pmu);
- perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+ pmu->sched_task(cpuctx->task_ctx, sched_in);
- perf_pmu_disable(pmu);
-
- pmu->sched_task(cpuctx->task_ctx, sched_in);
-
- perf_pmu_enable(pmu);
-
- perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
- }
+ perf_pmu_enable(pmu);
+ perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
-
- rcu_read_unlock();
-
- local_irq_restore(flags);
}
static void perf_event_switch(struct task_struct *task,
@@ -3416,6 +3424,22 @@
int ret;
};
+static int find_cpu_to_read(struct perf_event *event, int local_cpu)
+{
+ int event_cpu = event->oncpu;
+ u16 local_pkg, event_pkg;
+
+ if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
+ event_pkg = topology_physical_package_id(event_cpu);
+ local_pkg = topology_physical_package_id(local_cpu);
+
+ if (event_pkg == local_pkg)
+ return local_cpu;
+ }
+
+ return event_cpu;
+}
+
/*
* Cross CPU call to read the hardware event
*/
@@ -3537,7 +3561,7 @@
static int perf_event_read(struct perf_event *event, bool group)
{
- int ret = 0;
+ int ret = 0, cpu_to_read, local_cpu;
/*
* If event is enabled and currently active on a CPU, update the
@@ -3549,6 +3573,11 @@
.group = group,
.ret = 0,
};
+
+ local_cpu = get_cpu();
+ cpu_to_read = find_cpu_to_read(event, local_cpu);
+ put_cpu();
+
/*
* Purposely ignore the smp_call_function_single() return
* value.
@@ -3559,7 +3588,7 @@
* Therefore, either way, we'll have an up-to-date event count
* after this.
*/
- (void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
+ (void)smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1);
ret = data.ret;
} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
struct perf_event_context *ctx = event->ctx;
@@ -3929,7 +3958,7 @@
static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
{
- if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) &&
+ if ((e1->pmu == e2->pmu) &&
(e1->cpu == e2->cpu ||
e1->cpu == -1 ||
e2->cpu == -1))
@@ -5350,9 +5379,10 @@
struct pt_regs *regs, u64 mask)
{
int bit;
+ DECLARE_BITMAP(_mask, 64);
- for_each_set_bit(bit, (const unsigned long *) &mask,
- sizeof(mask) * BITS_PER_BYTE) {
+ bitmap_from_u64(_mask, mask);
+ for_each_set_bit(bit, _mask, sizeof(mask) * BITS_PER_BYTE) {
u64 val;
val = perf_reg_value(regs, bit);
@@ -9505,6 +9535,9 @@
goto err_alloc;
}
+ if (pmu->task_ctx_nr == perf_sw_context)
+ event->event_caps |= PERF_EV_CAP_SOFTWARE;
+
if (group_leader &&
(is_software_event(event) != is_software_event(group_leader))) {
if (is_software_event(event)) {
@@ -9518,7 +9551,7 @@
*/
pmu = group_leader->pmu;
} else if (is_software_event(group_leader) &&
- (group_leader->group_flags & PERF_GROUP_SOFTWARE)) {
+ (group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
/*
* In case the group is a pure software group, and we
* try to add a hardware event, move the whole group to
@@ -10453,6 +10486,8 @@
INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
+
+ INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
}
}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 8c50276..d4129bb 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -150,7 +150,7 @@
* Returns 0 on success, -EFAULT on failure.
*/
static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
- struct page *page, struct page *kpage)
+ struct page *old_page, struct page *new_page)
{
struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl;
@@ -161,49 +161,49 @@
const unsigned long mmun_end = addr + PAGE_SIZE;
struct mem_cgroup *memcg;
- err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg,
+ err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
false);
if (err)
return err;
/* For try_to_free_swap() and munlock_vma_page() below */
- lock_page(page);
+ lock_page(old_page);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
err = -EAGAIN;
- ptep = page_check_address(page, mm, addr, &ptl, 0);
+ ptep = page_check_address(old_page, mm, addr, &ptl, 0);
if (!ptep) {
- mem_cgroup_cancel_charge(kpage, memcg, false);
+ mem_cgroup_cancel_charge(new_page, memcg, false);
goto unlock;
}
- get_page(kpage);
- page_add_new_anon_rmap(kpage, vma, addr, false);
- mem_cgroup_commit_charge(kpage, memcg, false, false);
- lru_cache_add_active_or_unevictable(kpage, vma);
+ get_page(new_page);
+ page_add_new_anon_rmap(new_page, vma, addr, false);
+ mem_cgroup_commit_charge(new_page, memcg, false, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
- if (!PageAnon(page)) {
- dec_mm_counter(mm, mm_counter_file(page));
+ if (!PageAnon(old_page)) {
+ dec_mm_counter(mm, mm_counter_file(old_page));
inc_mm_counter(mm, MM_ANONPAGES);
}
flush_cache_page(vma, addr, pte_pfn(*ptep));
ptep_clear_flush_notify(vma, addr, ptep);
- set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
+ set_pte_at_notify(mm, addr, ptep, mk_pte(new_page, vma->vm_page_prot));
- page_remove_rmap(page, false);
- if (!page_mapped(page))
- try_to_free_swap(page);
+ page_remove_rmap(old_page, false);
+ if (!page_mapped(old_page))
+ try_to_free_swap(old_page);
pte_unmap_unlock(ptep, ptl);
if (vma->vm_flags & VM_LOCKED)
- munlock_vma_page(page);
- put_page(page);
+ munlock_vma_page(old_page);
+ put_page(old_page);
err = 0;
unlock:
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
- unlock_page(page);
+ unlock_page(old_page);
return err;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index 091a78b..1e1d913 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -725,7 +725,7 @@
static inline void check_stack_usage(void) {}
#endif
-void do_exit(long code)
+void __noreturn do_exit(long code)
{
struct task_struct *tsk = current;
int group_dead;
@@ -882,29 +882,7 @@
exit_rcu();
TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
- /*
- * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
- * when the following two conditions become true.
- * - There is race condition of mmap_sem (It is acquired by
- * exit_mm()), and
- * - SMI occurs before setting TASK_RUNINNG.
- * (or hypervisor of virtual machine switches to other guest)
- * As a result, we may become TASK_RUNNING after becoming TASK_DEAD
- *
- * To avoid it, we have to wait for releasing tsk->pi_lock which
- * is held by try_to_wake_up()
- */
- smp_mb();
- raw_spin_unlock_wait(&tsk->pi_lock);
-
- /* causes final put_task_struct in finish_task_switch(). */
- tsk->state = TASK_DEAD;
- tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
- schedule();
- BUG();
- /* Avoid "noreturn function does return". */
- for (;;)
- cpu_relax(); /* For when BUG is null */
+ do_task_dead();
}
EXPORT_SYMBOL_GPL(do_exit);
diff --git a/kernel/fork.c b/kernel/fork.c
index beb3172..c060c7e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -158,19 +158,83 @@
* Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
* kmemcache based allocator.
*/
-# if THREAD_SIZE >= PAGE_SIZE
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
- int node)
+# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
+
+#ifdef CONFIG_VMAP_STACK
+/*
+ * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
+ * flush. Try to minimize the number of calls by caching stacks.
+ */
+#define NR_CACHED_STACKS 2
+static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
+#endif
+
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
{
+#ifdef CONFIG_VMAP_STACK
+ void *stack;
+ int i;
+
+ local_irq_disable();
+ for (i = 0; i < NR_CACHED_STACKS; i++) {
+ struct vm_struct *s = this_cpu_read(cached_stacks[i]);
+
+ if (!s)
+ continue;
+ this_cpu_write(cached_stacks[i], NULL);
+
+ tsk->stack_vm_area = s;
+ local_irq_enable();
+ return s->addr;
+ }
+ local_irq_enable();
+
+ stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ THREADINFO_GFP | __GFP_HIGHMEM,
+ PAGE_KERNEL,
+ 0, node, __builtin_return_address(0));
+
+ /*
+ * We can't call find_vm_area() in interrupt context, and
+ * free_thread_stack() can be called in interrupt context,
+ * so cache the vm_struct.
+ */
+ if (stack)
+ tsk->stack_vm_area = find_vm_area(stack);
+ return stack;
+#else
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
THREAD_SIZE_ORDER);
return page ? page_address(page) : NULL;
+#endif
}
-static inline void free_thread_stack(unsigned long *stack)
+static inline void free_thread_stack(struct task_struct *tsk)
{
- __free_pages(virt_to_page(stack), THREAD_SIZE_ORDER);
+#ifdef CONFIG_VMAP_STACK
+ if (task_stack_vm_area(tsk)) {
+ unsigned long flags;
+ int i;
+
+ local_irq_save(flags);
+ for (i = 0; i < NR_CACHED_STACKS; i++) {
+ if (this_cpu_read(cached_stacks[i]))
+ continue;
+
+ this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
+ local_irq_restore(flags);
+ return;
+ }
+ local_irq_restore(flags);
+
+ vfree(tsk->stack);
+ return;
+ }
+#endif
+
+ __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
}
# else
static struct kmem_cache *thread_stack_cache;
@@ -181,9 +245,9 @@
return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
}
-static void free_thread_stack(unsigned long *stack)
+static void free_thread_stack(struct task_struct *tsk)
{
- kmem_cache_free(thread_stack_cache, stack);
+ kmem_cache_free(thread_stack_cache, tsk->stack);
}
void thread_stack_cache_init(void)
@@ -213,24 +277,76 @@
/* SLAB cache for mm_struct structures (tsk->mm) */
static struct kmem_cache *mm_cachep;
-static void account_kernel_stack(unsigned long *stack, int account)
+static void account_kernel_stack(struct task_struct *tsk, int account)
{
- /* All stack pages are in the same zone and belong to the same memcg. */
- struct page *first_page = virt_to_page(stack);
+ void *stack = task_stack_page(tsk);
+ struct vm_struct *vm = task_stack_vm_area(tsk);
- mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
- THREAD_SIZE / 1024 * account);
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
- memcg_kmem_update_page_stat(
- first_page, MEMCG_KERNEL_STACK_KB,
- account * (THREAD_SIZE / 1024));
+ if (vm) {
+ int i;
+
+ BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+ mod_zone_page_state(page_zone(vm->pages[i]),
+ NR_KERNEL_STACK_KB,
+ PAGE_SIZE / 1024 * account);
+ }
+
+ /* All stack pages belong to the same memcg. */
+ memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB,
+ account * (THREAD_SIZE / 1024));
+ } else {
+ /*
+ * All stack pages are in the same zone and belong to the
+ * same memcg.
+ */
+ struct page *first_page = virt_to_page(stack);
+
+ mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
+ THREAD_SIZE / 1024 * account);
+
+ memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB,
+ account * (THREAD_SIZE / 1024));
+ }
}
+static void release_task_stack(struct task_struct *tsk)
+{
+ account_kernel_stack(tsk, -1);
+ arch_release_thread_stack(tsk->stack);
+ free_thread_stack(tsk);
+ tsk->stack = NULL;
+#ifdef CONFIG_VMAP_STACK
+ tsk->stack_vm_area = NULL;
+#endif
+}
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+void put_task_stack(struct task_struct *tsk)
+{
+ if (atomic_dec_and_test(&tsk->stack_refcount))
+ release_task_stack(tsk);
+}
+#endif
+
void free_task(struct task_struct *tsk)
{
- account_kernel_stack(tsk->stack, -1);
- arch_release_thread_stack(tsk->stack);
- free_thread_stack(tsk->stack);
+#ifndef CONFIG_THREAD_INFO_IN_TASK
+ /*
+ * The task is finally done with both the stack and thread_info,
+ * so free both.
+ */
+ release_task_stack(tsk);
+#else
+ /*
+ * If the task had a separate stack allocation, it should be gone
+ * by now.
+ */
+ WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
+#endif
rt_mutex_debug_task_free(tsk);
ftrace_graph_exit_task(tsk);
put_seccomp_filter(tsk);
@@ -342,6 +458,7 @@
{
struct task_struct *tsk;
unsigned long *stack;
+ struct vm_struct *stack_vm_area;
int err;
if (node == NUMA_NO_NODE)
@@ -354,11 +471,26 @@
if (!stack)
goto free_tsk;
+ stack_vm_area = task_stack_vm_area(tsk);
+
err = arch_dup_task_struct(tsk, orig);
+
+ /*
+ * arch_dup_task_struct() clobbers the stack-related fields. Make
+ * sure they're properly initialized before using any stack-related
+ * functions again.
+ */
+ tsk->stack = stack;
+#ifdef CONFIG_VMAP_STACK
+ tsk->stack_vm_area = stack_vm_area;
+#endif
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ atomic_set(&tsk->stack_refcount, 1);
+#endif
+
if (err)
goto free_stack;
- tsk->stack = stack;
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
@@ -390,14 +522,14 @@
tsk->task_frag.page = NULL;
tsk->wake_q.next = NULL;
- account_kernel_stack(stack, 1);
+ account_kernel_stack(tsk, 1);
kcov_task_init(tsk);
return tsk;
free_stack:
- free_thread_stack(stack);
+ free_thread_stack(tsk);
free_tsk:
free_task_struct(tsk);
return NULL;
@@ -1715,6 +1847,7 @@
atomic_dec(&p->cred->user->processes);
exit_creds(p);
bad_fork_free:
+ put_task_stack(p);
free_task(p);
fork_out:
return ERR_PTR(retval);
diff --git a/kernel/futex.c b/kernel/futex.c
index 46cb3a3..2c4be46 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -381,8 +381,12 @@
#endif
}
-/*
- * We hash on the keys returned from get_futex_key (see below).
+/**
+ * hash_futex - Return the hash bucket in the global hash
+ * @key: Pointer to the futex key for which the hash is calculated
+ *
+ * We hash on the keys returned from get_futex_key (see below) and return the
+ * corresponding hash bucket in the global hash.
*/
static struct futex_hash_bucket *hash_futex(union futex_key *key)
{
@@ -392,7 +396,12 @@
return &futex_queues[hash & (futex_hashsize - 1)];
}
-/*
+
+/**
+ * match_futex - Check whether two futex keys are equal
+ * @key1: Pointer to key1
+ * @key2: Pointer to key2
+ *
* Return 1 if two futex_keys are equal, 0 otherwise.
*/
static inline int match_futex(union futex_key *key1, union futex_key *key2)
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index d234022..432c3d7 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -117,7 +117,7 @@
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
- debug_show_held_locks(t);
+ debug_show_all_locks();
touch_nmi_watchdog();
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 32f6cfc..17f51d63 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -4,60 +4,151 @@
#include <linux/slab.h>
#include <linux/cpu.h>
-static int get_first_sibling(unsigned int cpu)
+static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
+ int cpus_per_vec)
{
- unsigned int ret;
+ const struct cpumask *siblmsk;
+ int cpu, sibl;
- ret = cpumask_first(topology_sibling_cpumask(cpu));
- if (ret < nr_cpu_ids)
- return ret;
- return cpu;
+ for ( ; cpus_per_vec > 0; ) {
+ cpu = cpumask_first(nmsk);
+
+ /* Should not happen, but I'm too lazy to think about it */
+ if (cpu >= nr_cpu_ids)
+ return;
+
+ cpumask_clear_cpu(cpu, nmsk);
+ cpumask_set_cpu(cpu, irqmsk);
+ cpus_per_vec--;
+
+ /* If the cpu has siblings, use them first */
+ siblmsk = topology_sibling_cpumask(cpu);
+ for (sibl = -1; cpus_per_vec > 0; ) {
+ sibl = cpumask_next(sibl, siblmsk);
+ if (sibl >= nr_cpu_ids)
+ break;
+ if (!cpumask_test_and_clear_cpu(sibl, nmsk))
+ continue;
+ cpumask_set_cpu(sibl, irqmsk);
+ cpus_per_vec--;
+ }
+ }
}
-/*
- * Take a map of online CPUs and the number of available interrupt vectors
- * and generate an output cpumask suitable for spreading MSI/MSI-X vectors
- * so that they are distributed as good as possible around the CPUs. If
- * more vectors than CPUs are available we'll map one to each CPU,
- * otherwise we map one to the first sibling of each socket.
- *
- * If there are more vectors than CPUs we will still only have one bit
- * set per CPU, but interrupt code will keep on assigning the vectors from
- * the start of the bitmap until we run out of vectors.
- */
-struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
+static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
{
- struct cpumask *affinity_mask;
- unsigned int max_vecs = *nr_vecs;
+ int n, nodes;
- if (max_vecs == 1)
- return NULL;
-
- affinity_mask = kzalloc(cpumask_size(), GFP_KERNEL);
- if (!affinity_mask) {
- *nr_vecs = 1;
- return NULL;
+ /* Calculate the number of nodes in the supplied affinity mask */
+ for (n = 0, nodes = 0; n < num_online_nodes(); n++) {
+ if (cpumask_intersects(mask, cpumask_of_node(n))) {
+ node_set(n, *nodemsk);
+ nodes++;
+ }
}
+ return nodes;
+}
+/**
+ * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
+ * @affinity: The affinity mask to spread. If NULL cpu_online_mask
+ * is used
+ * @nvecs: The number of vectors
+ *
+ * Returns the masks pointer or NULL if allocation failed.
+ */
+struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity,
+ int nvec)
+{
+ int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec = 0;
+ nodemask_t nodemsk = NODE_MASK_NONE;
+ struct cpumask *masks;
+ cpumask_var_t nmsk;
+
+ if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
+ return NULL;
+
+ masks = kzalloc(nvec * sizeof(*masks), GFP_KERNEL);
+ if (!masks)
+ goto out;
+
+ /* Stabilize the cpumasks */
get_online_cpus();
- if (max_vecs >= num_online_cpus()) {
- cpumask_copy(affinity_mask, cpu_online_mask);
- *nr_vecs = num_online_cpus();
- } else {
- unsigned int vecs = 0, cpu;
+ /* If the supplied affinity mask is NULL, use cpu online mask */
+ if (!affinity)
+ affinity = cpu_online_mask;
- for_each_online_cpu(cpu) {
- if (cpu == get_first_sibling(cpu)) {
- cpumask_set_cpu(cpu, affinity_mask);
- vecs++;
- }
+ nodes = get_nodes_in_cpumask(affinity, &nodemsk);
- if (--max_vecs == 0)
+ /*
+ * If the number of nodes in the mask is less than or equal the
+ * number of vectors we just spread the vectors across the nodes.
+ */
+ if (nvec <= nodes) {
+ for_each_node_mask(n, nodemsk) {
+ cpumask_copy(masks + curvec, cpumask_of_node(n));
+ if (++curvec == nvec)
break;
}
- *nr_vecs = vecs;
+ goto outonl;
}
- put_online_cpus();
- return affinity_mask;
+ /* Spread the vectors per node */
+ vecs_per_node = nvec / nodes;
+ /* Account for rounding errors */
+ extra_vecs = nvec - (nodes * vecs_per_node);
+
+ for_each_node_mask(n, nodemsk) {
+ int ncpus, v, vecs_to_assign = vecs_per_node;
+
+ /* Get the cpus on this node which are in the mask */
+ cpumask_and(nmsk, affinity, cpumask_of_node(n));
+
+ /* Calculate the number of cpus per vector */
+ ncpus = cpumask_weight(nmsk);
+
+ for (v = 0; curvec < nvec && v < vecs_to_assign; curvec++, v++) {
+ cpus_per_vec = ncpus / vecs_to_assign;
+
+ /* Account for extra vectors to compensate rounding errors */
+ if (extra_vecs) {
+ cpus_per_vec++;
+ if (!--extra_vecs)
+ vecs_per_node++;
+ }
+ irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
+ }
+
+ if (curvec >= nvec)
+ break;
+ }
+
+outonl:
+ put_online_cpus();
+out:
+ free_cpumask_var(nmsk);
+ return masks;
+}
+
+/**
+ * irq_calc_affinity_vectors - Calculate to optimal number of vectors for a given affinity mask
+ * @affinity: The affinity mask to spread. If NULL cpu_online_mask
+ * is used
+ * @maxvec: The maximum number of vectors available
+ */
+int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
+{
+ int cpus, ret;
+
+ /* Stabilize the cpumasks */
+ get_online_cpus();
+ /* If the supplied affinity mask is NULL, use cpu online mask */
+ if (!affinity)
+ affinity = cpu_online_mask;
+
+ cpus = cpumask_weight(affinity);
+ ret = (cpus < maxvec) ? cpus : maxvec;
+
+ put_online_cpus();
+ return ret;
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 6373890..be3c34e 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -76,7 +76,6 @@
if (!desc)
return -EINVAL;
- type &= IRQ_TYPE_SENSE_MASK;
ret = __irq_set_trigger(desc, type);
irq_put_desc_busunlock(desc, flags);
return ret;
@@ -756,7 +755,6 @@
{
struct irq_chip *chip = irq_desc_get_chip(desc);
struct irqaction *action = desc->action;
- void *dev_id = raw_cpu_ptr(action->percpu_dev_id);
unsigned int irq = irq_desc_get_irq(desc);
irqreturn_t res;
@@ -765,15 +763,26 @@
if (chip->irq_ack)
chip->irq_ack(&desc->irq_data);
- trace_irq_handler_entry(irq, action);
- res = action->handler(irq, dev_id);
- trace_irq_handler_exit(irq, action, res);
+ if (likely(action)) {
+ trace_irq_handler_entry(irq, action);
+ res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
+ trace_irq_handler_exit(irq, action, res);
+ } else {
+ unsigned int cpu = smp_processor_id();
+ bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled);
+
+ if (enabled)
+ irq_percpu_disable(desc, cpu);
+
+ pr_err_once("Spurious%s percpu IRQ%u on CPU%u\n",
+ enabled ? " and unmasked" : "", irq, cpu);
+ }
if (chip->irq_eoi)
chip->irq_eoi(&desc->irq_data);
}
-void
+static void
__irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
int is_chained, const char *name)
{
@@ -820,6 +829,8 @@
desc->name = name;
if (handle != handle_bad_irq && is_chained) {
+ unsigned int type = irqd_get_trigger_type(&desc->irq_data);
+
/*
* We're about to start this interrupt immediately,
* hence the need to set the trigger configuration.
@@ -828,8 +839,10 @@
* chained interrupt. Reset it immediately because we
* do know better.
*/
- __irq_set_trigger(desc, irqd_get_trigger_type(&desc->irq_data));
- desc->handle_irq = handle;
+ if (type != IRQ_TYPE_NONE) {
+ __irq_set_trigger(desc, type);
+ desc->handle_irq = handle;
+ }
irq_settings_set_noprobe(desc);
irq_settings_set_norequest(desc);
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index abd286a..ee32870 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -260,9 +260,9 @@
}
/**
- * irq_alloc_domain_generic_chip - Allocate generic chips for an irq domain
+ * __irq_alloc_domain_generic_chip - Allocate generic chips for an irq domain
* @d: irq domain for which to allocate chips
- * @irqs_per_chip: Number of interrupts each chip handles
+ * @irqs_per_chip: Number of interrupts each chip handles (max 32)
* @num_ct: Number of irq_chip_type instances associated with this
* @name: Name of the irq chip
* @handler: Default flow handler associated with these chips
@@ -270,11 +270,11 @@
* @set: IRQ_* bits to set in the mapping function
* @gcflags: Generic chip specific setup flags
*/
-int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
- int num_ct, const char *name,
- irq_flow_handler_t handler,
- unsigned int clr, unsigned int set,
- enum irq_gc_flags gcflags)
+int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
+ int num_ct, const char *name,
+ irq_flow_handler_t handler,
+ unsigned int clr, unsigned int set,
+ enum irq_gc_flags gcflags)
{
struct irq_domain_chip_generic *dgc;
struct irq_chip_generic *gc;
@@ -326,7 +326,21 @@
d->name = name;
return 0;
}
-EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips);
+EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips);
+
+static struct irq_chip_generic *
+__irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq)
+{
+ struct irq_domain_chip_generic *dgc = d->gc;
+ int idx;
+
+ if (!dgc)
+ return ERR_PTR(-ENODEV);
+ idx = hw_irq / dgc->irqs_per_chip;
+ if (idx >= dgc->num_chips)
+ return ERR_PTR(-EINVAL);
+ return dgc->gc[idx];
+}
/**
* irq_get_domain_generic_chip - Get a pointer to the generic chip of a hw_irq
@@ -336,15 +350,9 @@
struct irq_chip_generic *
irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq)
{
- struct irq_domain_chip_generic *dgc = d->gc;
- int idx;
+ struct irq_chip_generic *gc = __irq_get_domain_generic_chip(d, hw_irq);
- if (!dgc)
- return NULL;
- idx = hw_irq / dgc->irqs_per_chip;
- if (idx >= dgc->num_chips)
- return NULL;
- return dgc->gc[idx];
+ return !IS_ERR(gc) ? gc : NULL;
}
EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip);
@@ -368,13 +376,9 @@
unsigned long flags;
int idx;
- if (!d->gc)
- return -ENODEV;
-
- idx = hw_irq / dgc->irqs_per_chip;
- if (idx >= dgc->num_chips)
- return -EINVAL;
- gc = dgc->gc[idx];
+ gc = __irq_get_domain_generic_chip(d, hw_irq);
+ if (IS_ERR(gc))
+ return PTR_ERR(gc);
idx = hw_irq % dgc->irqs_per_chip;
@@ -409,10 +413,30 @@
irq_modify_status(virq, dgc->irq_flags_to_clear, dgc->irq_flags_to_set);
return 0;
}
-EXPORT_SYMBOL_GPL(irq_map_generic_chip);
+
+static void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq)
+{
+ struct irq_data *data = irq_domain_get_irq_data(d, virq);
+ struct irq_domain_chip_generic *dgc = d->gc;
+ unsigned int hw_irq = data->hwirq;
+ struct irq_chip_generic *gc;
+ int irq_idx;
+
+ gc = irq_get_domain_generic_chip(d, hw_irq);
+ if (!gc)
+ return;
+
+ irq_idx = hw_irq % dgc->irqs_per_chip;
+
+ clear_bit(irq_idx, &gc->installed);
+ irq_domain_set_info(d, virq, hw_irq, &no_irq_chip, NULL, NULL, NULL,
+ NULL);
+
+}
struct irq_domain_ops irq_generic_chip_ops = {
.map = irq_map_generic_chip,
+ .unmap = irq_unmap_generic_chip,
.xlate = irq_domain_xlate_onetwocell,
};
EXPORT_SYMBOL_GPL(irq_generic_chip_ops);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a623b44..00bb0ae 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -15,6 +15,7 @@
#include <linux/radix-tree.h>
#include <linux/bitmap.h>
#include <linux/irqdomain.h>
+#include <linux/sysfs.h>
#include "internals.h"
@@ -123,6 +124,181 @@
#ifdef CONFIG_SPARSE_IRQ
+static void irq_kobj_release(struct kobject *kobj);
+
+#ifdef CONFIG_SYSFS
+static struct kobject *irq_kobj_base;
+
+#define IRQ_ATTR_RO(_name) \
+static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
+static ssize_t per_cpu_count_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+ int cpu, irq = desc->irq_data.irq;
+ ssize_t ret = 0;
+ char *p = "";
+
+ for_each_possible_cpu(cpu) {
+ unsigned int c = kstat_irqs_cpu(irq, cpu);
+
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c);
+ p = ",";
+ }
+
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+ return ret;
+}
+IRQ_ATTR_RO(per_cpu_count);
+
+static ssize_t chip_name_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+ ssize_t ret = 0;
+
+ raw_spin_lock_irq(&desc->lock);
+ if (desc->irq_data.chip && desc->irq_data.chip->name) {
+ ret = scnprintf(buf, PAGE_SIZE, "%s\n",
+ desc->irq_data.chip->name);
+ }
+ raw_spin_unlock_irq(&desc->lock);
+
+ return ret;
+}
+IRQ_ATTR_RO(chip_name);
+
+static ssize_t hwirq_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+ ssize_t ret = 0;
+
+ raw_spin_lock_irq(&desc->lock);
+ if (desc->irq_data.domain)
+ ret = sprintf(buf, "%d\n", (int)desc->irq_data.hwirq);
+ raw_spin_unlock_irq(&desc->lock);
+
+ return ret;
+}
+IRQ_ATTR_RO(hwirq);
+
+static ssize_t type_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+ ssize_t ret = 0;
+
+ raw_spin_lock_irq(&desc->lock);
+ ret = sprintf(buf, "%s\n",
+ irqd_is_level_type(&desc->irq_data) ? "level" : "edge");
+ raw_spin_unlock_irq(&desc->lock);
+
+ return ret;
+
+}
+IRQ_ATTR_RO(type);
+
+static ssize_t name_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+ ssize_t ret = 0;
+
+ raw_spin_lock_irq(&desc->lock);
+ if (desc->name)
+ ret = scnprintf(buf, PAGE_SIZE, "%s\n", desc->name);
+ raw_spin_unlock_irq(&desc->lock);
+
+ return ret;
+}
+IRQ_ATTR_RO(name);
+
+static ssize_t actions_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+ struct irqaction *action;
+ ssize_t ret = 0;
+ char *p = "";
+
+ raw_spin_lock_irq(&desc->lock);
+ for (action = desc->action; action != NULL; action = action->next) {
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
+ p, action->name);
+ p = ",";
+ }
+ raw_spin_unlock_irq(&desc->lock);
+
+ if (ret)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+
+ return ret;
+}
+IRQ_ATTR_RO(actions);
+
+static struct attribute *irq_attrs[] = {
+ &per_cpu_count_attr.attr,
+ &chip_name_attr.attr,
+ &hwirq_attr.attr,
+ &type_attr.attr,
+ &name_attr.attr,
+ &actions_attr.attr,
+ NULL
+};
+
+static struct kobj_type irq_kobj_type = {
+ .release = irq_kobj_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_attrs = irq_attrs,
+};
+
+static void irq_sysfs_add(int irq, struct irq_desc *desc)
+{
+ if (irq_kobj_base) {
+ /*
+ * Continue even in case of failure as this is nothing
+ * crucial.
+ */
+ if (kobject_add(&desc->kobj, irq_kobj_base, "%d", irq))
+ pr_warn("Failed to add kobject for irq %d\n", irq);
+ }
+}
+
+static int __init irq_sysfs_init(void)
+{
+ struct irq_desc *desc;
+ int irq;
+
+ /* Prevent concurrent irq alloc/free */
+ irq_lock_sparse();
+
+ irq_kobj_base = kobject_create_and_add("irq", kernel_kobj);
+ if (!irq_kobj_base) {
+ irq_unlock_sparse();
+ return -ENOMEM;
+ }
+
+ /* Add the already allocated interrupts */
+ for_each_irq_desc(irq, desc)
+ irq_sysfs_add(irq, desc);
+ irq_unlock_sparse();
+
+ return 0;
+}
+postcore_initcall(irq_sysfs_init);
+
+#else /* !CONFIG_SYSFS */
+
+static struct kobj_type irq_kobj_type = {
+ .release = irq_kobj_release,
+};
+
+static void irq_sysfs_add(int irq, struct irq_desc *desc) {}
+
+#endif /* CONFIG_SYSFS */
+
static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
@@ -187,6 +363,7 @@
desc_set_defaults(irq, desc, node, affinity, owner);
irqd_set(&desc->irq_data, flags);
+ kobject_init(&desc->kobj, &irq_kobj_type);
return desc;
@@ -197,15 +374,22 @@
return NULL;
}
-static void delayed_free_desc(struct rcu_head *rhp)
+static void irq_kobj_release(struct kobject *kobj)
{
- struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
+ struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
free_masks(desc);
free_percpu(desc->kstat_irqs);
kfree(desc);
}
+static void delayed_free_desc(struct rcu_head *rhp)
+{
+ struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
+
+ kobject_put(&desc->kobj);
+}
+
static void free_desc(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -217,8 +401,12 @@
* kstat_irq_usr(). Once we deleted the descriptor from the
* sparse tree we can free it. Access in proc will fail to
* lookup the descriptor.
+ *
+ * The sysfs entry must be serialized against a concurrent
+ * irq_sysfs_init() as well.
*/
mutex_lock(&sparse_irq_lock);
+ kobject_del(&desc->kobj);
delete_irq_desc(irq);
mutex_unlock(&sparse_irq_lock);
@@ -236,31 +424,31 @@
const struct cpumask *mask = NULL;
struct irq_desc *desc;
unsigned int flags;
- int i, cpu = -1;
+ int i;
- if (affinity && cpumask_empty(affinity))
- return -EINVAL;
+ /* Validate affinity mask(s) */
+ if (affinity) {
+ for (i = 0, mask = affinity; i < cnt; i++, mask++) {
+ if (cpumask_empty(mask))
+ return -EINVAL;
+ }
+ }
flags = affinity ? IRQD_AFFINITY_MANAGED : 0;
+ mask = NULL;
for (i = 0; i < cnt; i++) {
if (affinity) {
- cpu = cpumask_next(cpu, affinity);
- if (cpu >= nr_cpu_ids)
- cpu = cpumask_first(affinity);
- node = cpu_to_node(cpu);
-
- /*
- * For single allocations we use the caller provided
- * mask otherwise we use the mask of the target cpu
- */
- mask = cnt == 1 ? affinity : cpumask_of(cpu);
+ node = cpu_to_node(cpumask_first(affinity));
+ mask = affinity;
+ affinity++;
}
desc = alloc_desc(start + i, node, flags, mask, owner);
if (!desc)
goto err;
mutex_lock(&sparse_irq_lock);
irq_insert_desc(start + i, desc);
+ irq_sysfs_add(start + i, desc);
mutex_unlock(&sparse_irq_lock);
}
return start;
@@ -481,9 +669,9 @@
* @cnt: Number of consecutive irqs to allocate.
* @node: Preferred node on which the irq descriptor should be allocated
* @owner: Owning module (can be NULL)
- * @affinity: Optional pointer to an affinity mask which hints where the
- * irq descriptors should be allocated and which default
- * affinities to use
+ * @affinity: Optional pointer to an affinity mask array of size @cnt which
+ * hints where the irq descriptors should be allocated and which
+ * default affinities to use
*
* Returns the first irq number or error code
*/
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 4752b43..8c0a0ae 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -80,7 +80,7 @@
/**
* __irq_domain_add() - Allocate a new irq_domain data structure
- * @of_node: optional device-tree node of the interrupt controller
+ * @fwnode: firmware node for the interrupt controller
* @size: Size of linear map; 0 for radix mapping only
* @hwirq_max: Maximum number of interrupts supported by controller
* @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
@@ -96,10 +96,8 @@
const struct irq_domain_ops *ops,
void *host_data)
{
+ struct device_node *of_node = to_of_node(fwnode);
struct irq_domain *domain;
- struct device_node *of_node;
-
- of_node = to_of_node(fwnode);
domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
GFP_KERNEL, of_node_to_nid(of_node));
@@ -868,7 +866,10 @@
if (WARN_ON(intsize < 1))
return -EINVAL;
*out_hwirq = intspec[0];
- *out_type = (intsize > 1) ? intspec[1] : IRQ_TYPE_NONE;
+ if (intsize > 1)
+ *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
+ else
+ *out_type = IRQ_TYPE_NONE;
return 0;
}
EXPORT_SYMBOL_GPL(irq_domain_xlate_onetwocell);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 9530fcd..0c5f1a5 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -669,8 +669,6 @@
return 0;
}
- flags &= IRQ_TYPE_SENSE_MASK;
-
if (chip->flags & IRQCHIP_SET_TYPE_MASKED) {
if (!irqd_irq_masked(&desc->irq_data))
mask_irq(desc);
@@ -678,7 +676,8 @@
unmask = 1;
}
- /* caller masked out all except trigger mode flags */
+ /* Mask all flags except trigger mode */
+ flags &= IRQ_TYPE_SENSE_MASK;
ret = chip->irq_set_type(&desc->irq_data, flags);
switch (ret) {
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 19e9dfb..8a3e8727 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -18,20 +18,42 @@
/* Temparory solution for building, will be removed later */
#include <linux/pci.h>
-struct msi_desc *alloc_msi_entry(struct device *dev)
+/**
+ * alloc_msi_entry - Allocate an initialize msi_entry
+ * @dev: Pointer to the device for which this is allocated
+ * @nvec: The number of vectors used in this entry
+ * @affinity: Optional pointer to an affinity mask array size of @nvec
+ *
+ * If @affinity is not NULL then a an affinity array[@nvec] is allocated
+ * and the affinity masks from @affinity are copied.
+ */
+struct msi_desc *
+alloc_msi_entry(struct device *dev, int nvec, const struct cpumask *affinity)
{
- struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ struct msi_desc *desc;
+
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
if (!desc)
return NULL;
INIT_LIST_HEAD(&desc->list);
desc->dev = dev;
+ desc->nvec_used = nvec;
+ if (affinity) {
+ desc->affinity = kmemdup(affinity,
+ nvec * sizeof(*desc->affinity), GFP_KERNEL);
+ if (!desc->affinity) {
+ kfree(desc);
+ return NULL;
+ }
+ }
return desc;
}
void free_msi_entry(struct msi_desc *entry)
{
+ kfree(entry->affinity);
kfree(entry);
}
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 9ff173d..4ab4c37 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -64,7 +64,7 @@
static struct kthread *to_live_kthread(struct task_struct *k)
{
struct completion *vfork = ACCESS_ONCE(k->vfork_done);
- if (likely(vfork))
+ if (likely(vfork) && try_get_task_stack(k))
return __to_kthread(vfork);
return NULL;
}
@@ -425,8 +425,10 @@
{
struct kthread *kthread = to_live_kthread(k);
- if (kthread)
+ if (kthread) {
__kthread_unpark(k, kthread);
+ put_task_stack(k);
+ }
}
EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -455,6 +457,7 @@
wait_for_completion(&kthread->parked);
}
}
+ put_task_stack(k);
ret = 0;
}
return ret;
@@ -490,6 +493,7 @@
__kthread_unpark(k, kthread);
wake_up_process(k);
wait_for_completion(&kthread->exited);
+ put_task_stack(k);
}
ret = k->exit_code;
put_task_struct(k);
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 31322a4..6f88e35 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -18,7 +18,6 @@
endif
obj-$(CONFIG_SMP) += spinlock.o
obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
-obj-$(CONFIG_SMP) += lglock.o
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c
deleted file mode 100644
index 951cfcd..0000000
--- a/kernel/locking/lglock.c
+++ /dev/null
@@ -1,111 +0,0 @@
-/* See include/linux/lglock.h for description */
-#include <linux/module.h>
-#include <linux/lglock.h>
-#include <linux/cpu.h>
-#include <linux/string.h>
-
-/*
- * Note there is no uninit, so lglocks cannot be defined in
- * modules (but it's fine to use them from there)
- * Could be added though, just undo lg_lock_init
- */
-
-void lg_lock_init(struct lglock *lg, char *name)
-{
- LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
-}
-EXPORT_SYMBOL(lg_lock_init);
-
-void lg_local_lock(struct lglock *lg)
-{
- arch_spinlock_t *lock;
-
- preempt_disable();
- lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
- lock = this_cpu_ptr(lg->lock);
- arch_spin_lock(lock);
-}
-EXPORT_SYMBOL(lg_local_lock);
-
-void lg_local_unlock(struct lglock *lg)
-{
- arch_spinlock_t *lock;
-
- lock_release(&lg->lock_dep_map, 1, _RET_IP_);
- lock = this_cpu_ptr(lg->lock);
- arch_spin_unlock(lock);
- preempt_enable();
-}
-EXPORT_SYMBOL(lg_local_unlock);
-
-void lg_local_lock_cpu(struct lglock *lg, int cpu)
-{
- arch_spinlock_t *lock;
-
- preempt_disable();
- lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
- lock = per_cpu_ptr(lg->lock, cpu);
- arch_spin_lock(lock);
-}
-EXPORT_SYMBOL(lg_local_lock_cpu);
-
-void lg_local_unlock_cpu(struct lglock *lg, int cpu)
-{
- arch_spinlock_t *lock;
-
- lock_release(&lg->lock_dep_map, 1, _RET_IP_);
- lock = per_cpu_ptr(lg->lock, cpu);
- arch_spin_unlock(lock);
- preempt_enable();
-}
-EXPORT_SYMBOL(lg_local_unlock_cpu);
-
-void lg_double_lock(struct lglock *lg, int cpu1, int cpu2)
-{
- BUG_ON(cpu1 == cpu2);
-
- /* lock in cpu order, just like lg_global_lock */
- if (cpu2 < cpu1)
- swap(cpu1, cpu2);
-
- preempt_disable();
- lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
- arch_spin_lock(per_cpu_ptr(lg->lock, cpu1));
- arch_spin_lock(per_cpu_ptr(lg->lock, cpu2));
-}
-
-void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2)
-{
- lock_release(&lg->lock_dep_map, 1, _RET_IP_);
- arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1));
- arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2));
- preempt_enable();
-}
-
-void lg_global_lock(struct lglock *lg)
-{
- int i;
-
- preempt_disable();
- lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
- for_each_possible_cpu(i) {
- arch_spinlock_t *lock;
- lock = per_cpu_ptr(lg->lock, i);
- arch_spin_lock(lock);
- }
-}
-EXPORT_SYMBOL(lg_global_lock);
-
-void lg_global_unlock(struct lglock *lg)
-{
- int i;
-
- lock_release(&lg->lock_dep_map, 1, _RET_IP_);
- for_each_possible_cpu(i) {
- arch_spinlock_t *lock;
- lock = per_cpu_ptr(lg->lock, i);
- arch_spin_unlock(lock);
- }
- preempt_enable();
-}
-EXPORT_SYMBOL(lg_global_unlock);
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index bec0b64..ce18259 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -8,152 +8,186 @@
#include <linux/sched.h>
#include <linux/errno.h>
-int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
+int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
const char *name, struct lock_class_key *rwsem_key)
{
- brw->fast_read_ctr = alloc_percpu(int);
- if (unlikely(!brw->fast_read_ctr))
+ sem->read_count = alloc_percpu(int);
+ if (unlikely(!sem->read_count))
return -ENOMEM;
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
- __init_rwsem(&brw->rw_sem, name, rwsem_key);
- rcu_sync_init(&brw->rss, RCU_SCHED_SYNC);
- atomic_set(&brw->slow_read_ctr, 0);
- init_waitqueue_head(&brw->write_waitq);
+ rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
+ __init_rwsem(&sem->rw_sem, name, rwsem_key);
+ init_waitqueue_head(&sem->writer);
+ sem->readers_block = 0;
return 0;
}
EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
-void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
+void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
{
/*
* XXX: temporary kludge. The error path in alloc_super()
* assumes that percpu_free_rwsem() is safe after kzalloc().
*/
- if (!brw->fast_read_ctr)
+ if (!sem->read_count)
return;
- rcu_sync_dtor(&brw->rss);
- free_percpu(brw->fast_read_ctr);
- brw->fast_read_ctr = NULL; /* catch use after free bugs */
+ rcu_sync_dtor(&sem->rss);
+ free_percpu(sem->read_count);
+ sem->read_count = NULL; /* catch use after free bugs */
}
EXPORT_SYMBOL_GPL(percpu_free_rwsem);
-/*
- * This is the fast-path for down_read/up_read. If it succeeds we rely
- * on the barriers provided by rcu_sync_enter/exit; see the comments in
- * percpu_down_write() and percpu_up_write().
- *
- * If this helper fails the callers rely on the normal rw_semaphore and
- * atomic_dec_and_test(), so in this case we have the necessary barriers.
- */
-static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
-{
- bool success;
-
- preempt_disable();
- success = rcu_sync_is_idle(&brw->rss);
- if (likely(success))
- __this_cpu_add(*brw->fast_read_ctr, val);
- preempt_enable();
-
- return success;
-}
-
-/*
- * Like the normal down_read() this is not recursive, the writer can
- * come after the first percpu_down_read() and create the deadlock.
- *
- * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
- * percpu_up_read() does rwsem_release(). This pairs with the usage
- * of ->rw_sem in percpu_down/up_write().
- */
-void percpu_down_read(struct percpu_rw_semaphore *brw)
-{
- might_sleep();
- rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
-
- if (likely(update_fast_ctr(brw, +1)))
- return;
-
- /* Avoid rwsem_acquire_read() and rwsem_release() */
- __down_read(&brw->rw_sem);
- atomic_inc(&brw->slow_read_ctr);
- __up_read(&brw->rw_sem);
-}
-EXPORT_SYMBOL_GPL(percpu_down_read);
-
-int percpu_down_read_trylock(struct percpu_rw_semaphore *brw)
-{
- if (unlikely(!update_fast_ctr(brw, +1))) {
- if (!__down_read_trylock(&brw->rw_sem))
- return 0;
- atomic_inc(&brw->slow_read_ctr);
- __up_read(&brw->rw_sem);
- }
-
- rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 1, _RET_IP_);
- return 1;
-}
-
-void percpu_up_read(struct percpu_rw_semaphore *brw)
-{
- rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
-
- if (likely(update_fast_ctr(brw, -1)))
- return;
-
- /* false-positive is possible but harmless */
- if (atomic_dec_and_test(&brw->slow_read_ctr))
- wake_up_all(&brw->write_waitq);
-}
-EXPORT_SYMBOL_GPL(percpu_up_read);
-
-static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
-{
- unsigned int sum = 0;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- sum += per_cpu(*brw->fast_read_ctr, cpu);
- per_cpu(*brw->fast_read_ctr, cpu) = 0;
- }
-
- return sum;
-}
-
-void percpu_down_write(struct percpu_rw_semaphore *brw)
+int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
{
/*
- * Make rcu_sync_is_idle() == F and thus disable the fast-path in
- * percpu_down_read() and percpu_up_read(), and wait for gp pass.
+ * Due to having preemption disabled the decrement happens on
+ * the same CPU as the increment, avoiding the
+ * increment-on-one-CPU-and-decrement-on-another problem.
*
- * The latter synchronises us with the preceding readers which used
- * the fast-past, so we can not miss the result of __this_cpu_add()
- * or anything else inside their criticial sections.
+ * If the reader misses the writer's assignment of readers_block, then
+ * the writer is guaranteed to see the reader's increment.
+ *
+ * Conversely, any readers that increment their sem->read_count after
+ * the writer looks are guaranteed to see the readers_block value,
+ * which in turn means that they are guaranteed to immediately
+ * decrement their sem->read_count, so that it doesn't matter that the
+ * writer missed them.
*/
- rcu_sync_enter(&brw->rss);
- /* exclude other writers, and block the new readers completely */
- down_write(&brw->rw_sem);
+ smp_mb(); /* A matches D */
- /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
- atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
+ /*
+ * If !readers_block the critical section starts here, matched by the
+ * release in percpu_up_write().
+ */
+ if (likely(!smp_load_acquire(&sem->readers_block)))
+ return 1;
- /* wait for all readers to complete their percpu_up_read() */
- wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
+ /*
+ * Per the above comment; we still have preemption disabled and
+ * will thus decrement on the same CPU as we incremented.
+ */
+ __percpu_up_read(sem);
+
+ if (try)
+ return 0;
+
+ /*
+ * We either call schedule() in the wait, or we'll fall through
+ * and reschedule on the preempt_enable() in percpu_down_read().
+ */
+ preempt_enable_no_resched();
+
+ /*
+ * Avoid lockdep for the down/up_read() we already have them.
+ */
+ __down_read(&sem->rw_sem);
+ this_cpu_inc(*sem->read_count);
+ __up_read(&sem->rw_sem);
+
+ preempt_disable();
+ return 1;
+}
+EXPORT_SYMBOL_GPL(__percpu_down_read);
+
+void __percpu_up_read(struct percpu_rw_semaphore *sem)
+{
+ smp_mb(); /* B matches C */
+ /*
+ * In other words, if they see our decrement (presumably to aggregate
+ * zero, as that is the only time it matters) they will also see our
+ * critical section.
+ */
+ __this_cpu_dec(*sem->read_count);
+
+ /* Prod writer to recheck readers_active */
+ wake_up(&sem->writer);
+}
+EXPORT_SYMBOL_GPL(__percpu_up_read);
+
+#define per_cpu_sum(var) \
+({ \
+ typeof(var) __sum = 0; \
+ int cpu; \
+ compiletime_assert_atomic_type(__sum); \
+ for_each_possible_cpu(cpu) \
+ __sum += per_cpu(var, cpu); \
+ __sum; \
+})
+
+/*
+ * Return true if the modular sum of the sem->read_count per-CPU variable is
+ * zero. If this sum is zero, then it is stable due to the fact that if any
+ * newly arriving readers increment a given counter, they will immediately
+ * decrement that same counter.
+ */
+static bool readers_active_check(struct percpu_rw_semaphore *sem)
+{
+ if (per_cpu_sum(*sem->read_count) != 0)
+ return false;
+
+ /*
+ * If we observed the decrement; ensure we see the entire critical
+ * section.
+ */
+
+ smp_mb(); /* C matches B */
+
+ return true;
+}
+
+void percpu_down_write(struct percpu_rw_semaphore *sem)
+{
+ /* Notify readers to take the slow path. */
+ rcu_sync_enter(&sem->rss);
+
+ down_write(&sem->rw_sem);
+
+ /*
+ * Notify new readers to block; up until now, and thus throughout the
+ * longish rcu_sync_enter() above, new readers could still come in.
+ */
+ WRITE_ONCE(sem->readers_block, 1);
+
+ smp_mb(); /* D matches A */
+
+ /*
+ * If they don't see our writer of readers_block, then we are
+ * guaranteed to see their sem->read_count increment, and therefore
+ * will wait for them.
+ */
+
+ /* Wait for all now active readers to complete. */
+ wait_event(sem->writer, readers_active_check(sem));
}
EXPORT_SYMBOL_GPL(percpu_down_write);
-void percpu_up_write(struct percpu_rw_semaphore *brw)
+void percpu_up_write(struct percpu_rw_semaphore *sem)
{
- /* release the lock, but the readers can't use the fast-path */
- up_write(&brw->rw_sem);
/*
- * Enable the fast-path in percpu_down_read() and percpu_up_read()
- * but only after another gp pass; this adds the necessary barrier
- * to ensure the reader can't miss the changes done by us.
+ * Signal the writer is done, no fast path yet.
+ *
+ * One reason that we cannot just immediately flip to readers_fast is
+ * that new readers might fail to see the results of this writer's
+ * critical section.
+ *
+ * Therefore we force it through the slow path which guarantees an
+ * acquire and thereby guarantees the critical section's consistency.
*/
- rcu_sync_exit(&brw->rss);
+ smp_store_release(&sem->readers_block, 0);
+
+ /*
+ * Release the write lock, this will allow readers back in the game.
+ */
+ up_write(&sem->rw_sem);
+
+ /*
+ * Once this completes (at least one RCU-sched grace period hence) the
+ * reader fast path will be available again. Safe to use outside the
+ * exclusive write lock because its counting.
+ */
+ rcu_sync_exit(&sem->rss);
}
EXPORT_SYMBOL_GPL(percpu_up_write);
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 8a99abf..e3b5520 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -70,11 +70,14 @@
static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
{
struct __qspinlock *l = (void *)lock;
- int ret = !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
- (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
- qstat_inc(qstat_pv_lock_stealing, ret);
- return ret;
+ if (!(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
+ (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0)) {
+ qstat_inc(qstat_pv_lock_stealing, true);
+ return true;
+ }
+
+ return false;
}
/*
@@ -257,7 +260,6 @@
static inline bool
pv_wait_early(struct pv_node *prev, int loop)
{
-
if ((loop & PV_PREV_CHECK_MASK) != 0)
return false;
@@ -286,12 +288,10 @@
{
struct pv_node *pn = (struct pv_node *)node;
struct pv_node *pp = (struct pv_node *)prev;
- int waitcnt = 0;
int loop;
bool wait_early;
- /* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
- for (;; waitcnt++) {
+ for (;;) {
for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
if (READ_ONCE(node->locked))
return;
@@ -315,7 +315,6 @@
if (!READ_ONCE(node->locked)) {
qstat_inc(qstat_pv_wait_node, true);
- qstat_inc(qstat_pv_wait_again, waitcnt);
qstat_inc(qstat_pv_wait_early, wait_early);
pv_wait(&pn->state, vcpu_halted);
}
@@ -456,12 +455,9 @@
pv_wait(&l->locked, _Q_SLOW_VAL);
/*
- * The unlocker should have freed the lock before kicking the
- * CPU. So if the lock is still not free, it is a spurious
- * wakeup or another vCPU has stolen the lock. The current
- * vCPU should spin again.
+ * Because of lock stealing, the queue head vCPU may not be
+ * able to acquire the lock before it has to wait again.
*/
- qstat_inc(qstat_pv_spurious_wakeup, READ_ONCE(l->locked));
}
/*
@@ -544,7 +540,7 @@
* unhash. Otherwise it would be possible to have multiple @lock
* entries, which would be BAD.
*/
- locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+ locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0);
if (likely(locked == _Q_LOCKED_VAL))
return;
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index b9d0315..eb0a599 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -24,8 +24,8 @@
* pv_latency_wake - average latency (ns) from vCPU kick to wakeup
* pv_lock_slowpath - # of locking operations via the slowpath
* pv_lock_stealing - # of lock stealing operations
- * pv_spurious_wakeup - # of spurious wakeups
- * pv_wait_again - # of vCPU wait's that happened after a vCPU kick
+ * pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs
+ * pv_wait_again - # of wait's after a queue head vCPU kick
* pv_wait_early - # of early vCPU wait's
* pv_wait_head - # of vCPU wait's at the queue head
* pv_wait_node - # of vCPU wait's at a non-head queue node
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 447e08d..2337b4b 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -121,16 +121,19 @@
* - woken process blocks are discarded from the list after having task zeroed
* - writers are only marked woken if downgrading is false
*/
-static struct rw_semaphore *
-__rwsem_mark_wake(struct rw_semaphore *sem,
- enum rwsem_wake_type wake_type, struct wake_q_head *wake_q)
+static void __rwsem_mark_wake(struct rw_semaphore *sem,
+ enum rwsem_wake_type wake_type,
+ struct wake_q_head *wake_q)
{
- struct rwsem_waiter *waiter;
- struct task_struct *tsk;
- struct list_head *next;
- long oldcount, woken, loop, adjustment;
+ struct rwsem_waiter *waiter, *tmp;
+ long oldcount, woken = 0, adjustment = 0;
- waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+ /*
+ * Take a peek at the queue head waiter such that we can determine
+ * the wakeup(s) to perform.
+ */
+ waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
+
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
if (wake_type == RWSEM_WAKE_ANY) {
/*
@@ -142,19 +145,19 @@
*/
wake_q_add(wake_q, waiter->task);
}
- goto out;
+
+ return;
}
- /* Writers might steal the lock before we grant it to the next reader.
+ /*
+ * Writers might steal the lock before we grant it to the next reader.
* We prefer to do the first reader grant before counting readers
* so we can bail out early if a writer stole the lock.
*/
- adjustment = 0;
if (wake_type != RWSEM_WAKE_READ_OWNED) {
adjustment = RWSEM_ACTIVE_READ_BIAS;
try_reader_grant:
oldcount = atomic_long_fetch_add(adjustment, &sem->count);
-
if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
/*
* If the count is still less than RWSEM_WAITING_BIAS
@@ -164,7 +167,8 @@
*/
if (atomic_long_add_return(-adjustment, &sem->count) <
RWSEM_WAITING_BIAS)
- goto out;
+ return;
+
/* Last active locker left. Retry waking readers. */
goto try_reader_grant;
}
@@ -176,38 +180,23 @@
rwsem_set_reader_owned(sem);
}
- /* Grant an infinite number of read locks to the readers at the front
- * of the queue. Note we increment the 'active part' of the count by
- * the number of readers before waking any processes up.
+ /*
+ * Grant an infinite number of read locks to the readers at the front
+ * of the queue. We know that woken will be at least 1 as we accounted
+ * for above. Note we increment the 'active part' of the count by the
+ * number of readers before waking any processes up.
*/
- woken = 0;
- do {
- woken++;
+ list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
+ struct task_struct *tsk;
- if (waiter->list.next == &sem->wait_list)
+ if (waiter->type == RWSEM_WAITING_FOR_WRITE)
break;
- waiter = list_entry(waiter->list.next,
- struct rwsem_waiter, list);
-
- } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
-
- adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
- if (waiter->type != RWSEM_WAITING_FOR_WRITE)
- /* hit end of list above */
- adjustment -= RWSEM_WAITING_BIAS;
-
- if (adjustment)
- atomic_long_add(adjustment, &sem->count);
-
- next = sem->wait_list.next;
- loop = woken;
- do {
- waiter = list_entry(next, struct rwsem_waiter, list);
- next = waiter->list.next;
+ woken++;
tsk = waiter->task;
wake_q_add(wake_q, tsk);
+ list_del(&waiter->list);
/*
* Ensure that the last operation is setting the reader
* waiter to nil such that rwsem_down_read_failed() cannot
@@ -215,13 +204,16 @@
* to the task to wakeup.
*/
smp_store_release(&waiter->task, NULL);
- } while (--loop);
+ }
- sem->wait_list.next = next;
- next->prev = &sem->wait_list;
+ adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+ if (list_empty(&sem->wait_list)) {
+ /* hit end of list above */
+ adjustment -= RWSEM_WAITING_BIAS;
+ }
- out:
- return sem;
+ if (adjustment)
+ atomic_long_add(adjustment, &sem->count);
}
/*
@@ -235,7 +227,6 @@
struct task_struct *tsk = current;
WAKE_Q(wake_q);
- /* set up my own style of waitqueue */
waiter.task = tsk;
waiter.type = RWSEM_WAITING_FOR_READ;
@@ -247,7 +238,8 @@
/* we're now waiting on the lock, but no longer actively locking */
count = atomic_long_add_return(adjustment, &sem->count);
- /* If there are no active locks, wake the front queued process(es).
+ /*
+ * If there are no active locks, wake the front queued process(es).
*
* If there are no writers and we are first in the queue,
* wake our own waiter to join the existing active readers !
@@ -255,7 +247,7 @@
if (count == RWSEM_WAITING_BIAS ||
(count > RWSEM_WAITING_BIAS &&
adjustment != -RWSEM_ACTIVE_READ_BIAS))
- sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+ __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
wake_up_q(&wake_q);
@@ -505,7 +497,7 @@
if (count > RWSEM_WAITING_BIAS) {
WAKE_Q(wake_q);
- sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
+ __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
/*
* The wakeup is normally called _after_ the wait_lock
* is released, but given that we are proactively waking
@@ -614,9 +606,8 @@
raw_spin_lock_irqsave(&sem->wait_lock, flags);
locked:
- /* do nothing if list empty */
if (!list_empty(&sem->wait_list))
- sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+ __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
wake_up_q(&wake_q);
@@ -638,9 +629,8 @@
raw_spin_lock_irqsave(&sem->wait_lock, flags);
- /* do nothing if list empty */
if (!list_empty(&sem->wait_list))
- sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
+ __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
wake_up_q(&wake_q);
diff --git a/kernel/padata.c b/kernel/padata.c
index 9932788..7848f05 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -30,6 +30,7 @@
#include <linux/slab.h>
#include <linux/sysfs.h>
#include <linux/rcupdate.h>
+#include <linux/module.h>
#define MAX_OBJ_NUM 1000
@@ -769,52 +770,43 @@
cpumask_test_cpu(cpu, pinst->cpumask.cbcpu);
}
-
-static int padata_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int padata_cpu_online(unsigned int cpu, struct hlist_node *node)
{
- int err;
struct padata_instance *pinst;
- int cpu = (unsigned long)hcpu;
+ int ret;
- pinst = container_of(nfb, struct padata_instance, cpu_notifier);
+ pinst = hlist_entry_safe(node, struct padata_instance, node);
+ if (!pinst_has_cpu(pinst, cpu))
+ return 0;
- switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- if (!pinst_has_cpu(pinst, cpu))
- break;
- mutex_lock(&pinst->lock);
- err = __padata_add_cpu(pinst, cpu);
- mutex_unlock(&pinst->lock);
- if (err)
- return notifier_from_errno(err);
- break;
-
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- if (!pinst_has_cpu(pinst, cpu))
- break;
- mutex_lock(&pinst->lock);
- err = __padata_remove_cpu(pinst, cpu);
- mutex_unlock(&pinst->lock);
- if (err)
- return notifier_from_errno(err);
- break;
- }
-
- return NOTIFY_OK;
+ mutex_lock(&pinst->lock);
+ ret = __padata_add_cpu(pinst, cpu);
+ mutex_unlock(&pinst->lock);
+ return ret;
}
+
+static int padata_cpu_prep_down(unsigned int cpu, struct hlist_node *node)
+{
+ struct padata_instance *pinst;
+ int ret;
+
+ pinst = hlist_entry_safe(node, struct padata_instance, node);
+ if (!pinst_has_cpu(pinst, cpu))
+ return 0;
+
+ mutex_lock(&pinst->lock);
+ ret = __padata_remove_cpu(pinst, cpu);
+ mutex_unlock(&pinst->lock);
+ return ret;
+}
+
+static enum cpuhp_state hp_online;
#endif
static void __padata_free(struct padata_instance *pinst)
{
#ifdef CONFIG_HOTPLUG_CPU
- unregister_hotcpu_notifier(&pinst->cpu_notifier);
+ cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node);
#endif
padata_stop(pinst);
@@ -1012,11 +1004,8 @@
mutex_init(&pinst->lock);
#ifdef CONFIG_HOTPLUG_CPU
- pinst->cpu_notifier.notifier_call = padata_cpu_callback;
- pinst->cpu_notifier.priority = 0;
- register_hotcpu_notifier(&pinst->cpu_notifier);
+ cpuhp_state_add_instance_nocalls(hp_online, &pinst->node);
#endif
-
return pinst;
err_free_masks:
@@ -1039,3 +1028,26 @@
kobject_put(&pinst->kobj);
}
EXPORT_SYMBOL(padata_free);
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static __init int padata_driver_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "padata:online",
+ padata_cpu_online,
+ padata_cpu_prep_down);
+ if (ret < 0)
+ return ret;
+ hp_online = ret;
+ return 0;
+}
+module_init(padata_driver_init);
+
+static __exit void padata_driver_exit(void)
+{
+ cpuhp_remove_multi_state(hp_online);
+}
+module_exit(padata_driver_exit);
+#endif
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 68d3ebc..e8517b6 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -186,7 +186,7 @@
config DPM_WATCHDOG
bool "Device suspend/resume watchdog"
- depends on PM_DEBUG && PSTORE
+ depends on PM_DEBUG && PSTORE && EXPERT
---help---
Sets up a watchdog timer to capture drivers that are
locked up attempting to suspend/resume a device.
@@ -197,7 +197,7 @@
config DPM_WATCHDOG_TIMEOUT
int "Watchdog timeout in seconds"
range 1 120
- default 60
+ default 120
depends on DPM_WATCHDOG
config PM_TRACE
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 33c79b6..b26dbc4 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -306,8 +306,10 @@
if (error)
printk(KERN_ERR "PM: Error %d creating hibernation image\n",
error);
- if (!in_suspend)
+ if (!in_suspend) {
events_check_enabled = false;
+ clear_free_pages();
+ }
platform_leave(platform_mode);
@@ -1189,22 +1191,6 @@
return 1;
}
-static int __init page_poison_nohibernate_setup(char *str)
-{
-#ifdef CONFIG_PAGE_POISONING_ZERO
- /*
- * The zeroing option for page poison skips the checks on alloc.
- * since hibernation doesn't save free pages there's no way to
- * guarantee the pages will still be zeroed.
- */
- if (!strcmp(str, "on")) {
- pr_info("Disabling hibernation due to page poisoning\n");
- return nohibernate_setup(str);
- }
-#endif
- return 1;
-}
-
__setup("noresume", noresume_setup);
__setup("resume_offset=", resume_offset_setup);
__setup("resume=", resume_setup);
@@ -1212,4 +1198,3 @@
__setup("resumewait", resumewait_setup);
__setup("resumedelay=", resumedelay_setup);
__setup("nohibernate", nohibernate_setup);
-__setup("page_poison=", page_poison_nohibernate_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 5ea50b1..281a697 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -644,6 +644,7 @@
return error;
hibernate_image_size_init();
hibernate_reserved_size_init();
+ pm_states_init();
power_kobj = kobject_create_and_add("power", NULL);
if (!power_kobj)
return -ENOMEM;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 242d8b8..56d1d0d 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -110,6 +110,8 @@
extern void free_basic_memory_bitmaps(void);
extern int hibernate_preallocate_memory(void);
+extern void clear_free_pages(void);
+
/**
* Auxiliary structure used for reading the snapshot image data and
* metadata from and writing them to the list of page backup entries
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index b022284..4f0f060 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1132,6 +1132,28 @@
pr_debug("PM: Basic memory bitmaps freed\n");
}
+void clear_free_pages(void)
+{
+#ifdef CONFIG_PAGE_POISONING_ZERO
+ struct memory_bitmap *bm = free_pages_map;
+ unsigned long pfn;
+
+ if (WARN_ON(!(free_pages_map)))
+ return;
+
+ memory_bm_position_reset(bm);
+ pfn = memory_bm_next_pfn(bm);
+ while (pfn != BM_END_OF_MAP) {
+ if (pfn_valid(pfn))
+ clear_highpage(pfn_to_page(pfn));
+
+ pfn = memory_bm_next_pfn(bm);
+ }
+ memory_bm_position_reset(bm);
+ pr_info("PM: free pages cleared after restore\n");
+#endif /* PAGE_POISONING_ZERO */
+}
+
/**
* snapshot_additional_pages - Estimate the number of extra pages needed.
* @zone: Memory zone to carry out the computation for.
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 0acab9d..1e7f5da 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -118,10 +118,18 @@
*/
static bool relative_states;
+void __init pm_states_init(void)
+{
+ /*
+ * freeze state should be supported even without any suspend_ops,
+ * initialize pm_states accordingly here
+ */
+ pm_states[PM_SUSPEND_FREEZE] = pm_labels[relative_states ? 0 : 2];
+}
+
static int __init sleep_states_setup(char *str)
{
relative_states = !strncmp(str, "1", 1);
- pm_states[PM_SUSPEND_FREEZE] = pm_labels[relative_states ? 0 : 2];
return 1;
}
@@ -211,7 +219,7 @@
{
if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin)
return freeze_ops->begin();
- else if (suspend_ops->begin)
+ else if (suspend_ops && suspend_ops->begin)
return suspend_ops->begin(state);
else
return 0;
@@ -221,7 +229,7 @@
{
if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end)
freeze_ops->end();
- else if (suspend_ops->end)
+ else if (suspend_ops && suspend_ops->end)
suspend_ops->end();
}
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index d38ab08..123ccbd 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -52,7 +52,7 @@
#define PERF_FLAG "-perf:"
#define PERFOUT_STRING(s) \
- pr_alert("%s" PERF_FLAG s "\n", perf_type)
+ pr_alert("%s" PERF_FLAG " %s\n", perf_type, s)
#define VERBOSE_PERFOUT_STRING(s) \
do { if (verbose) pr_alert("%s" PERF_FLAG " %s\n", perf_type, s); } while (0)
#define VERBOSE_PERFOUT_ERRSTRING(s) \
@@ -400,9 +400,8 @@
sp.sched_priority = 0;
sched_setscheduler_nocheck(current,
SCHED_NORMAL, &sp);
- pr_alert("%s" PERF_FLAG
- "rcu_perf_writer %ld has %d measurements\n",
- perf_type, me, MIN_MEAS);
+ pr_alert("%s%s rcu_perf_writer %ld has %d measurements\n",
+ perf_type, PERF_FLAG, me, MIN_MEAS);
if (atomic_inc_return(&n_rcu_perf_writer_finished) >=
nrealwriters) {
schedule_timeout_interruptible(10);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 971e2b1..bf08fee 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1238,6 +1238,7 @@
long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
static unsigned long rtcv_snap = ULONG_MAX;
+ struct task_struct *wtp;
for_each_possible_cpu(cpu) {
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
@@ -1258,8 +1259,9 @@
atomic_read(&n_rcu_torture_alloc),
atomic_read(&n_rcu_torture_alloc_fail),
atomic_read(&n_rcu_torture_free));
- pr_cont("rtmbe: %d rtbke: %ld rtbre: %ld ",
+ pr_cont("rtmbe: %d rtbe: %ld rtbke: %ld rtbre: %ld ",
atomic_read(&n_rcu_torture_mberror),
+ n_rcu_torture_barrier_error,
n_rcu_torture_boost_ktrerror,
n_rcu_torture_boost_rterror);
pr_cont("rtbf: %ld rtb: %ld nt: %ld ",
@@ -1312,10 +1314,12 @@
rcutorture_get_gp_data(cur_ops->ttype,
&flags, &gpnum, &completed);
- pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x\n",
+ wtp = READ_ONCE(writer_task);
+ pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx\n",
rcu_torture_writer_state_getname(),
rcu_torture_writer_state,
- gpnum, completed, flags);
+ gpnum, completed, flags,
+ wtp == NULL ? ~0UL : wtp->state);
show_rcu_gp_kthreads();
rcu_ftrace_dump(DUMP_ALL);
}
@@ -1362,12 +1366,12 @@
onoff_interval, onoff_holdoff);
}
-static void rcutorture_booster_cleanup(int cpu)
+static int rcutorture_booster_cleanup(unsigned int cpu)
{
struct task_struct *t;
if (boost_tasks[cpu] == NULL)
- return;
+ return 0;
mutex_lock(&boost_mutex);
t = boost_tasks[cpu];
boost_tasks[cpu] = NULL;
@@ -1375,9 +1379,10 @@
/* This must be outside of the mutex, otherwise deadlock! */
torture_stop_kthread(rcu_torture_boost, t);
+ return 0;
}
-static int rcutorture_booster_init(int cpu)
+static int rcutorture_booster_init(unsigned int cpu)
{
int retval;
@@ -1577,28 +1582,7 @@
}
}
-static int rcutorture_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- long cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- case CPU_DOWN_FAILED:
- (void)rcutorture_booster_init(cpu);
- break;
- case CPU_DOWN_PREPARE:
- rcutorture_booster_cleanup(cpu);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block rcutorture_cpu_nb = {
- .notifier_call = rcutorture_cpu_notify,
-};
+static enum cpuhp_state rcutor_hp;
static void
rcu_torture_cleanup(void)
@@ -1638,11 +1622,8 @@
for (i = 0; i < ncbflooders; i++)
torture_stop_kthread(rcu_torture_cbflood, cbflood_task[i]);
if ((test_boost == 1 && cur_ops->can_boost) ||
- test_boost == 2) {
- unregister_cpu_notifier(&rcutorture_cpu_nb);
- for_each_possible_cpu(i)
- rcutorture_booster_cleanup(i);
- }
+ test_boost == 2)
+ cpuhp_remove_state(rcutor_hp);
/*
* Wait for all RCU callbacks to fire, then do flavor-specific
@@ -1869,14 +1850,13 @@
test_boost == 2) {
boost_starttime = jiffies + test_boost_interval * HZ;
- register_cpu_notifier(&rcutorture_cpu_nb);
- for_each_possible_cpu(i) {
- if (cpu_is_offline(i))
- continue; /* Heuristic: CPU can go offline. */
- firsterr = rcutorture_booster_init(i);
- if (firsterr)
- goto unwind;
- }
+
+ firsterr = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "RCU_TORTURE",
+ rcutorture_booster_init,
+ rcutorture_booster_cleanup);
+ if (firsterr < 0)
+ goto unwind;
+ rcutor_hp = firsterr;
}
firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup);
if (firsterr)
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index be922c9..50d1861 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -68,6 +68,8 @@
RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(),
"suspicious rcu_sync_is_idle() usage");
}
+
+EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert);
#endif
/**
@@ -83,6 +85,18 @@
}
/**
+ * Must be called after rcu_sync_init() and before first use.
+ *
+ * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}()
+ * pairs turn into NO-OPs.
+ */
+void rcu_sync_enter_start(struct rcu_sync *rsp)
+{
+ rsp->gp_count++;
+ rsp->gp_state = GP_PASSED;
+}
+
+/**
* rcu_sync_enter() - Force readers onto slowpath
* @rsp: Pointer to rcu_sync structure to use for synchronization
*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5d80925..7e2e038 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -41,7 +41,6 @@
#include <linux/export.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
-#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
@@ -60,7 +59,6 @@
#include "tree.h"
#include "rcu.h"
-MODULE_ALIAS("rcutree");
#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#endif
@@ -1848,6 +1846,7 @@
struct rcu_data *rdp)
{
bool ret;
+ bool need_gp;
/* Handle the ends of any preceding grace periods first. */
if (rdp->completed == rnp->completed &&
@@ -1874,9 +1873,10 @@
*/
rdp->gpnum = rnp->gpnum;
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
- rdp->cpu_no_qs.b.norm = true;
+ need_gp = !!(rnp->qsmask & rdp->grpmask);
+ rdp->cpu_no_qs.b.norm = need_gp;
rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
- rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask);
+ rdp->core_needs_qs = need_gp;
zero_cpu_stall_ticks(rdp);
WRITE_ONCE(rdp->gpwrap, false);
}
@@ -2344,7 +2344,7 @@
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
- swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
+ rcu_gp_kthread_wake(rsp);
}
/*
@@ -2970,7 +2970,7 @@
}
WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
- swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
+ rcu_gp_kthread_wake(rsp);
}
/*
@@ -3792,8 +3792,6 @@
rnp = rdp->mynode;
mask = rdp->grpmask;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
- rnp->qsmaskinitnext |= mask;
- rnp->expmaskinitnext |= mask;
if (!rdp->beenonline)
WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
rdp->beenonline = true; /* We have now been online. */
@@ -3860,6 +3858,32 @@
return 0;
}
+/*
+ * Mark the specified CPU as being online so that subsequent grace periods
+ * (both expedited and normal) will wait on it. Note that this means that
+ * incoming CPUs are not allowed to use RCU read-side critical sections
+ * until this function is called. Failing to observe this restriction
+ * will result in lockdep splats.
+ */
+void rcu_cpu_starting(unsigned int cpu)
+{
+ unsigned long flags;
+ unsigned long mask;
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
+ struct rcu_state *rsp;
+
+ for_each_rcu_flavor(rsp) {
+ rdp = this_cpu_ptr(rsp->rda);
+ rnp = rdp->mynode;
+ mask = rdp->grpmask;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ rnp->qsmaskinitnext |= mask;
+ rnp->expmaskinitnext |= mask;
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ }
+}
+
#ifdef CONFIG_HOTPLUG_CPU
/*
* The CPU is exiting the idle loop into the arch_cpu_idle_dead()
@@ -4209,8 +4233,10 @@
* or the scheduler are operational.
*/
pm_notifier(rcu_pm_notify, 0);
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
rcutree_prepare_cpu(cpu);
+ rcu_cpu_starting(cpu);
+ }
}
#include "tree_exp.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index f714f87..e99a523 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -400,6 +400,7 @@
#ifdef CONFIG_RCU_FAST_NO_HZ
struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
+ atomic_long_t exp_workdone0; /* # done by workqueue. */
atomic_long_t exp_workdone1; /* # done by others #1. */
atomic_long_t exp_workdone2; /* # done by others #2. */
atomic_long_t exp_workdone3; /* # done by others #3. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 6d86ab6..24343eb 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -359,7 +359,8 @@
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
if (raw_smp_processor_id() == cpu ||
- !(atomic_add_return(0, &rdtp->dynticks) & 0x1))
+ !(atomic_add_return(0, &rdtp->dynticks) & 0x1) ||
+ !(rnp->qsmaskinitnext & rdp->grpmask))
mask_ofl_test |= rdp->grpmask;
}
mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
@@ -384,17 +385,16 @@
mask_ofl_ipi &= ~mask;
continue;
}
- /* Failed, raced with offline. */
+ /* Failed, raced with CPU hotplug operation. */
raw_spin_lock_irqsave_rcu_node(rnp, flags);
- if (cpu_online(cpu) &&
+ if ((rnp->qsmaskinitnext & mask) &&
(rnp->expmask & mask)) {
+ /* Online, so delay for a bit and try again. */
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
schedule_timeout_uninterruptible(1);
- if (cpu_online(cpu) &&
- (rnp->expmask & mask))
- goto retry_ipi;
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ goto retry_ipi;
}
+ /* CPU really is offline, so we can ignore it. */
if (!(rnp->expmask & mask))
mask_ofl_ipi &= ~mask;
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -427,12 +427,10 @@
jiffies_stall);
if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
return;
- if (ret < 0) {
- /* Hit a signal, disable CPU stall warnings. */
- swait_event(rsp->expedited_wq,
- sync_rcu_preempt_exp_done(rnp_root));
- return;
- }
+ WARN_ON(ret < 0); /* workqueues should not be signaled. */
+ if (rcu_cpu_stall_suppress)
+ continue;
+ panic_on_rcu_stall();
pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
rsp->name);
ndetected = 0;
@@ -500,7 +498,6 @@
* next GP, to proceed.
*/
mutex_lock(&rsp->exp_wake_mutex);
- mutex_unlock(&rsp->exp_mutex);
rcu_for_each_node_breadth_first(rsp, rnp) {
if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
@@ -516,6 +513,70 @@
mutex_unlock(&rsp->exp_wake_mutex);
}
+/* Let the workqueue handler know what it is supposed to do. */
+struct rcu_exp_work {
+ smp_call_func_t rew_func;
+ struct rcu_state *rew_rsp;
+ unsigned long rew_s;
+ struct work_struct rew_work;
+};
+
+/*
+ * Work-queue handler to drive an expedited grace period forward.
+ */
+static void wait_rcu_exp_gp(struct work_struct *wp)
+{
+ struct rcu_exp_work *rewp;
+
+ /* Initialize the rcu_node tree in preparation for the wait. */
+ rewp = container_of(wp, struct rcu_exp_work, rew_work);
+ sync_rcu_exp_select_cpus(rewp->rew_rsp, rewp->rew_func);
+
+ /* Wait and clean up, including waking everyone. */
+ rcu_exp_wait_wake(rewp->rew_rsp, rewp->rew_s);
+}
+
+/*
+ * Given an rcu_state pointer and a smp_call_function() handler, kick
+ * off the specified flavor of expedited grace period.
+ */
+static void _synchronize_rcu_expedited(struct rcu_state *rsp,
+ smp_call_func_t func)
+{
+ struct rcu_data *rdp;
+ struct rcu_exp_work rew;
+ struct rcu_node *rnp;
+ unsigned long s;
+
+ /* If expedited grace periods are prohibited, fall back to normal. */
+ if (rcu_gp_is_normal()) {
+ wait_rcu_gp(rsp->call);
+ return;
+ }
+
+ /* Take a snapshot of the sequence number. */
+ s = rcu_exp_gp_seq_snap(rsp);
+ if (exp_funnel_lock(rsp, s))
+ return; /* Someone else did our work for us. */
+
+ /* Marshall arguments and schedule the expedited grace period. */
+ rew.rew_func = func;
+ rew.rew_rsp = rsp;
+ rew.rew_s = s;
+ INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
+ schedule_work(&rew.rew_work);
+
+ /* Wait for expedited grace period to complete. */
+ rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
+ rnp = rcu_get_root(rsp);
+ wait_event(rnp->exp_wq[(s >> 1) & 0x3],
+ sync_exp_work_done(rsp,
+ &rdp->exp_workdone0, s));
+
+ /* Let the next expedited grace period start. */
+ mutex_unlock(&rsp->exp_mutex);
+}
+
/**
* synchronize_sched_expedited - Brute-force RCU-sched grace period
*
@@ -534,29 +595,13 @@
*/
void synchronize_sched_expedited(void)
{
- unsigned long s;
struct rcu_state *rsp = &rcu_sched_state;
/* If only one CPU, this is automatically a grace period. */
if (rcu_blocking_is_gp())
return;
- /* If expedited grace periods are prohibited, fall back to normal. */
- if (rcu_gp_is_normal()) {
- wait_rcu_gp(call_rcu_sched);
- return;
- }
-
- /* Take a snapshot of the sequence number. */
- s = rcu_exp_gp_seq_snap(rsp);
- if (exp_funnel_lock(rsp, s))
- return; /* Someone else did our work for us. */
-
- /* Initialize the rcu_node tree in preparation for the wait. */
- sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler);
-
- /* Wait and clean up, including waking everyone. */
- rcu_exp_wait_wake(rsp, s);
+ _synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
@@ -620,23 +665,8 @@
void synchronize_rcu_expedited(void)
{
struct rcu_state *rsp = rcu_state_p;
- unsigned long s;
- /* If expedited grace periods are prohibited, fall back to normal. */
- if (rcu_gp_is_normal()) {
- wait_rcu_gp(call_rcu);
- return;
- }
-
- s = rcu_exp_gp_seq_snap(rsp);
- if (exp_funnel_lock(rsp, s))
- return; /* Someone else did our work for us. */
-
- /* Initialize the rcu_node tree in preparation for the wait. */
- sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler);
-
- /* Wait for ->blkd_tasks lists to drain, then wake everyone up. */
- rcu_exp_wait_wake(rsp, s);
+ _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0082fce..85c5a88 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2173,6 +2173,7 @@
cl++;
c++;
local_bh_enable();
+ cond_resched_rcu_qs();
list = next;
}
trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 86782f9a..b1f2897 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -185,16 +185,17 @@
int cpu;
struct rcu_state *rsp = (struct rcu_state *)m->private;
struct rcu_data *rdp;
- unsigned long s1 = 0, s2 = 0, s3 = 0;
+ unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(rsp->rda, cpu);
+ s0 += atomic_long_read(&rdp->exp_workdone0);
s1 += atomic_long_read(&rdp->exp_workdone1);
s2 += atomic_long_read(&rdp->exp_workdone2);
s3 += atomic_long_read(&rdp->exp_workdone3);
}
- seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
- rsp->expedited_sequence, s1, s2, s3,
+ seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
+ rsp->expedited_sequence, s0, s1, s2, s3,
atomic_long_read(&rsp->expedited_normal),
atomic_read(&rsp->expedited_need_qs),
rsp->expedited_sequence / 2);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index f0d8322..f19271d 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -46,7 +46,7 @@
#include <linux/export.h>
#include <linux/hardirq.h>
#include <linux/delay.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/kthread.h>
#include <linux/tick.h>
@@ -54,7 +54,6 @@
#include "rcu.h"
-MODULE_ALIAS("rcupdate");
#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#endif
diff --git a/kernel/relay.c b/kernel/relay.c
index d797502..fc9b4a4 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -214,7 +214,7 @@
__free_page(buf->page_array[i]);
relay_free_page_array(buf->page_array);
}
- chan->buf[buf->cpu] = NULL;
+ *per_cpu_ptr(chan->buf, buf->cpu) = NULL;
kfree(buf->padding);
kfree(buf);
kref_put(&chan->kref, relay_destroy_channel);
@@ -382,20 +382,21 @@
*/
void relay_reset(struct rchan *chan)
{
+ struct rchan_buf *buf;
unsigned int i;
if (!chan)
return;
- if (chan->is_global && chan->buf[0]) {
- __relay_reset(chan->buf[0], 0);
+ if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) {
+ __relay_reset(buf, 0);
return;
}
mutex_lock(&relay_channels_mutex);
for_each_possible_cpu(i)
- if (chan->buf[i])
- __relay_reset(chan->buf[i], 0);
+ if ((buf = *per_cpu_ptr(chan->buf, i)))
+ __relay_reset(buf, 0);
mutex_unlock(&relay_channels_mutex);
}
EXPORT_SYMBOL_GPL(relay_reset);
@@ -440,7 +441,7 @@
struct dentry *dentry;
if (chan->is_global)
- return chan->buf[0];
+ return *per_cpu_ptr(chan->buf, 0);
buf = relay_create_buf(chan);
if (!buf)
@@ -464,7 +465,7 @@
__relay_reset(buf, 1);
if(chan->is_global) {
- chan->buf[0] = buf;
+ *per_cpu_ptr(chan->buf, 0) = buf;
buf->cpu = 0;
}
@@ -512,46 +513,25 @@
chan->cb = cb;
}
-/**
- * relay_hotcpu_callback - CPU hotplug callback
- * @nb: notifier block
- * @action: hotplug action to take
- * @hcpu: CPU number
- *
- * Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD)
- */
-static int relay_hotcpu_callback(struct notifier_block *nb,
- unsigned long action,
- void *hcpu)
+int relay_prepare_cpu(unsigned int cpu)
{
- unsigned int hotcpu = (unsigned long)hcpu;
struct rchan *chan;
+ struct rchan_buf *buf;
- switch(action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- mutex_lock(&relay_channels_mutex);
- list_for_each_entry(chan, &relay_channels, list) {
- if (chan->buf[hotcpu])
- continue;
- chan->buf[hotcpu] = relay_open_buf(chan, hotcpu);
- if(!chan->buf[hotcpu]) {
- printk(KERN_ERR
- "relay_hotcpu_callback: cpu %d buffer "
- "creation failed\n", hotcpu);
- mutex_unlock(&relay_channels_mutex);
- return notifier_from_errno(-ENOMEM);
- }
+ mutex_lock(&relay_channels_mutex);
+ list_for_each_entry(chan, &relay_channels, list) {
+ if ((buf = *per_cpu_ptr(chan->buf, cpu)))
+ continue;
+ buf = relay_open_buf(chan, cpu);
+ if (!buf) {
+ pr_err("relay: cpu %d buffer creation failed\n", cpu);
+ mutex_unlock(&relay_channels_mutex);
+ return -ENOMEM;
}
- mutex_unlock(&relay_channels_mutex);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- /* No need to flush the cpu : will be flushed upon
- * final relay_flush() call. */
- break;
+ *per_cpu_ptr(chan->buf, cpu) = buf;
}
- return NOTIFY_OK;
+ mutex_unlock(&relay_channels_mutex);
+ return 0;
}
/**
@@ -583,6 +563,7 @@
{
unsigned int i;
struct rchan *chan;
+ struct rchan_buf *buf;
if (!(subbuf_size && n_subbufs))
return NULL;
@@ -593,6 +574,7 @@
if (!chan)
return NULL;
+ chan->buf = alloc_percpu(struct rchan_buf *);
chan->version = RELAYFS_CHANNEL_VERSION;
chan->n_subbufs = n_subbufs;
chan->subbuf_size = subbuf_size;
@@ -608,9 +590,10 @@
mutex_lock(&relay_channels_mutex);
for_each_online_cpu(i) {
- chan->buf[i] = relay_open_buf(chan, i);
- if (!chan->buf[i])
+ buf = relay_open_buf(chan, i);
+ if (!buf)
goto free_bufs;
+ *per_cpu_ptr(chan->buf, i) = buf;
}
list_add(&chan->list, &relay_channels);
mutex_unlock(&relay_channels_mutex);
@@ -619,8 +602,8 @@
free_bufs:
for_each_possible_cpu(i) {
- if (chan->buf[i])
- relay_close_buf(chan->buf[i]);
+ if ((buf = *per_cpu_ptr(chan->buf, i)))
+ relay_close_buf(buf);
}
kref_put(&chan->kref, relay_destroy_channel);
@@ -666,6 +649,7 @@
unsigned int i, curr_cpu;
unsigned long flags;
struct dentry *dentry;
+ struct rchan_buf *buf;
struct rchan_percpu_buf_dispatcher disp;
if (!chan || !base_filename)
@@ -684,10 +668,11 @@
if (chan->is_global) {
err = -EINVAL;
- if (!WARN_ON_ONCE(!chan->buf[0])) {
- dentry = relay_create_buf_file(chan, chan->buf[0], 0);
+ buf = *per_cpu_ptr(chan->buf, 0);
+ if (!WARN_ON_ONCE(!buf)) {
+ dentry = relay_create_buf_file(chan, buf, 0);
if (dentry && !WARN_ON_ONCE(!chan->is_global)) {
- relay_set_buf_dentry(chan->buf[0], dentry);
+ relay_set_buf_dentry(buf, dentry);
err = 0;
}
}
@@ -702,13 +687,14 @@
* on all currently online CPUs.
*/
for_each_online_cpu(i) {
- if (unlikely(!chan->buf[i])) {
+ buf = *per_cpu_ptr(chan->buf, i);
+ if (unlikely(!buf)) {
WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
err = -EINVAL;
break;
}
- dentry = relay_create_buf_file(chan, chan->buf[i], i);
+ dentry = relay_create_buf_file(chan, buf, i);
if (unlikely(!dentry)) {
err = -EINVAL;
break;
@@ -716,10 +702,10 @@
if (curr_cpu == i) {
local_irq_save(flags);
- relay_set_buf_dentry(chan->buf[i], dentry);
+ relay_set_buf_dentry(buf, dentry);
local_irq_restore(flags);
} else {
- disp.buf = chan->buf[i];
+ disp.buf = buf;
disp.dentry = dentry;
smp_mb();
/* relay_channels_mutex must be held, so wait. */
@@ -822,11 +808,10 @@
if (!chan)
return;
- if (cpu >= NR_CPUS || !chan->buf[cpu] ||
- subbufs_consumed > chan->n_subbufs)
+ buf = *per_cpu_ptr(chan->buf, cpu);
+ if (cpu >= NR_CPUS || !buf || subbufs_consumed > chan->n_subbufs)
return;
- buf = chan->buf[cpu];
if (subbufs_consumed > buf->subbufs_produced - buf->subbufs_consumed)
buf->subbufs_consumed = buf->subbufs_produced;
else
@@ -842,18 +827,19 @@
*/
void relay_close(struct rchan *chan)
{
+ struct rchan_buf *buf;
unsigned int i;
if (!chan)
return;
mutex_lock(&relay_channels_mutex);
- if (chan->is_global && chan->buf[0])
- relay_close_buf(chan->buf[0]);
+ if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0)))
+ relay_close_buf(buf);
else
for_each_possible_cpu(i)
- if (chan->buf[i])
- relay_close_buf(chan->buf[i]);
+ if ((buf = *per_cpu_ptr(chan->buf, i)))
+ relay_close_buf(buf);
if (chan->last_toobig)
printk(KERN_WARNING "relay: one or more items not logged "
@@ -874,20 +860,21 @@
*/
void relay_flush(struct rchan *chan)
{
+ struct rchan_buf *buf;
unsigned int i;
if (!chan)
return;
- if (chan->is_global && chan->buf[0]) {
- relay_switch_subbuf(chan->buf[0], 0);
+ if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) {
+ relay_switch_subbuf(buf, 0);
return;
}
mutex_lock(&relay_channels_mutex);
for_each_possible_cpu(i)
- if (chan->buf[i])
- relay_switch_subbuf(chan->buf[i], 0);
+ if ((buf = *per_cpu_ptr(chan->buf, i)))
+ relay_switch_subbuf(buf, 0);
mutex_unlock(&relay_channels_mutex);
}
EXPORT_SYMBOL_GPL(relay_flush);
@@ -1377,12 +1364,3 @@
.splice_read = relay_file_splice_read,
};
EXPORT_SYMBOL_GPL(relay_file_operations);
-
-static __init int relay_init(void)
-{
-
- hotcpu_notifier(relay_hotcpu_callback, 0);
- return 0;
-}
-
-early_initcall(relay_init);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 44817c6..94732d1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -581,6 +581,8 @@
* If needed we can still optimize that later with an
* empty IRQ.
*/
+ if (cpu_is_offline(cpu))
+ return true; /* Don't try to wake offline CPUs. */
if (tick_nohz_full_cpu(cpu)) {
if (cpu != smp_processor_id() ||
tick_nohz_tick_stopped())
@@ -591,6 +593,11 @@
return false;
}
+/*
+ * Wake up the specified CPU. If the CPU is going offline, it is the
+ * caller's responsibility to deal with the lost wakeup, for example,
+ * by hooking into the CPU_DEAD notifier like timers and hrtimers do.
+ */
void wake_up_nohz_cpu(int cpu)
{
if (!wake_up_full_nohz_cpu(cpu))
@@ -1063,8 +1070,12 @@
* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
* we're holding p->pi_lock.
*/
- if (task_rq(p) == rq && task_on_rq_queued(p))
- rq = __migrate_task(rq, p, arg->dest_cpu);
+ if (task_rq(p) == rq) {
+ if (task_on_rq_queued(p))
+ rq = __migrate_task(rq, p, arg->dest_cpu);
+ else
+ p->wake_cpu = arg->dest_cpu;
+ }
raw_spin_unlock(&rq->lock);
raw_spin_unlock(&p->pi_lock);
@@ -1105,10 +1116,10 @@
p->sched_class->set_cpus_allowed(p, new_mask);
- if (running)
- p->sched_class->set_curr_task(rq);
if (queued)
enqueue_task(rq, p, ENQUEUE_RESTORE);
+ if (running)
+ set_curr_task(rq, p);
}
/*
@@ -1265,7 +1276,7 @@
/*
* Task isn't running anymore; make it appear like we migrated
* it before it went to sleep. This means on wakeup we make the
- * previous cpu our targer instead of where it really is.
+ * previous cpu our target instead of where it really is.
*/
p->wake_cpu = cpu;
}
@@ -1629,23 +1640,25 @@
static void
ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
{
-#ifdef CONFIG_SCHEDSTATS
- struct rq *rq = this_rq();
+ struct rq *rq;
+
+ if (!schedstat_enabled())
+ return;
+
+ rq = this_rq();
#ifdef CONFIG_SMP
- int this_cpu = smp_processor_id();
-
- if (cpu == this_cpu) {
- schedstat_inc(rq, ttwu_local);
- schedstat_inc(p, se.statistics.nr_wakeups_local);
+ if (cpu == rq->cpu) {
+ schedstat_inc(rq->ttwu_local);
+ schedstat_inc(p->se.statistics.nr_wakeups_local);
} else {
struct sched_domain *sd;
- schedstat_inc(p, se.statistics.nr_wakeups_remote);
+ schedstat_inc(p->se.statistics.nr_wakeups_remote);
rcu_read_lock();
- for_each_domain(this_cpu, sd) {
+ for_each_domain(rq->cpu, sd) {
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
- schedstat_inc(sd, ttwu_wake_remote);
+ schedstat_inc(sd->ttwu_wake_remote);
break;
}
}
@@ -1653,17 +1666,14 @@
}
if (wake_flags & WF_MIGRATED)
- schedstat_inc(p, se.statistics.nr_wakeups_migrate);
-
+ schedstat_inc(p->se.statistics.nr_wakeups_migrate);
#endif /* CONFIG_SMP */
- schedstat_inc(rq, ttwu_count);
- schedstat_inc(p, se.statistics.nr_wakeups);
+ schedstat_inc(rq->ttwu_count);
+ schedstat_inc(p->se.statistics.nr_wakeups);
if (wake_flags & WF_SYNC)
- schedstat_inc(p, se.statistics.nr_wakeups_sync);
-
-#endif /* CONFIG_SCHEDSTATS */
+ schedstat_inc(p->se.statistics.nr_wakeups_sync);
}
static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
@@ -2084,8 +2094,7 @@
ttwu_queue(p, cpu, wake_flags);
stat:
- if (schedstat_enabled())
- ttwu_stat(p, cpu, wake_flags);
+ ttwu_stat(p, cpu, wake_flags);
out:
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
@@ -2095,6 +2104,7 @@
/**
* try_to_wake_up_local - try to wake up a local task with rq lock held
* @p: the thread to be awakened
+ * @cookie: context's cookie for pinning
*
* Put @p on the run-queue if it's not already there. The caller must
* ensure that this_rq() is locked, @p is bound to this_rq() and not
@@ -2133,8 +2143,7 @@
ttwu_activate(rq, p, ENQUEUE_WAKEUP);
ttwu_do_wakeup(rq, p, 0, cookie);
- if (schedstat_enabled())
- ttwu_stat(p, smp_processor_id(), 0);
+ ttwu_stat(p, smp_processor_id(), 0);
out:
raw_spin_unlock(&p->pi_lock);
}
@@ -2772,6 +2781,10 @@
* task and put them back on the free list.
*/
kprobe_flush_task(prev);
+
+ /* Task is done with its stack. */
+ put_task_stack(prev);
+
put_task_struct(prev);
}
@@ -3192,6 +3205,9 @@
*/
static noinline void __schedule_bug(struct task_struct *prev)
{
+ /* Save this before calling printk(), since that will clobber it */
+ unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
+
if (oops_in_progress)
return;
@@ -3202,13 +3218,12 @@
print_modules();
if (irqs_disabled())
print_irqtrace_events(prev);
-#ifdef CONFIG_DEBUG_PREEMPT
- if (in_atomic_preempt_off()) {
+ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
+ && in_atomic_preempt_off()) {
pr_err("Preemption disabled at:");
- print_ip_sym(current->preempt_disable_ip);
+ print_ip_sym(preempt_disable_ip);
pr_cont("\n");
}
-#endif
if (panic_on_warn)
panic("scheduling while atomic\n");
@@ -3234,7 +3249,7 @@
profile_hit(SCHED_PROFILING, __builtin_return_address(0));
- schedstat_inc(this_rq(), sched_count);
+ schedstat_inc(this_rq()->sched_count);
}
/*
@@ -3327,17 +3342,6 @@
rq = cpu_rq(cpu);
prev = rq->curr;
- /*
- * do_exit() calls schedule() with preemption disabled as an exception;
- * however we must fix that up, otherwise the next task will see an
- * inconsistent (higher) preempt count.
- *
- * It also avoids the below schedule_debug() test from complaining
- * about this.
- */
- if (unlikely(prev->state == TASK_DEAD))
- preempt_enable_no_resched_notrace();
-
schedule_debug(prev);
if (sched_feat(HRTICK))
@@ -3403,7 +3407,33 @@
balance_callback(rq);
}
-STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
+
+void __noreturn do_task_dead(void)
+{
+ /*
+ * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
+ * when the following two conditions become true.
+ * - There is race condition of mmap_sem (It is acquired by
+ * exit_mm()), and
+ * - SMI occurs before setting TASK_RUNINNG.
+ * (or hypervisor of virtual machine switches to other guest)
+ * As a result, we may become TASK_RUNNING after becoming TASK_DEAD
+ *
+ * To avoid it, we have to wait for releasing tsk->pi_lock which
+ * is held by try_to_wake_up()
+ */
+ smp_mb();
+ raw_spin_unlock_wait(¤t->pi_lock);
+
+ /* causes final put_task_struct in finish_task_switch(). */
+ __set_current_state(TASK_DEAD);
+ current->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
+ __schedule(false);
+ BUG();
+ /* Avoid "noreturn function does return". */
+ for (;;)
+ cpu_relax(); /* For when BUG is null */
+}
static inline void sched_submit_work(struct task_struct *tsk)
{
@@ -3687,10 +3717,10 @@
p->prio = prio;
- if (running)
- p->sched_class->set_curr_task(rq);
if (queued)
enqueue_task(rq, p, queue_flag);
+ if (running)
+ set_curr_task(rq, p);
check_class_changed(rq, p, prev_class, oldprio);
out_unlock:
@@ -3704,7 +3734,8 @@
void set_user_nice(struct task_struct *p, long nice)
{
- int old_prio, delta, queued;
+ bool queued, running;
+ int old_prio, delta;
struct rq_flags rf;
struct rq *rq;
@@ -3726,8 +3757,11 @@
goto out_unlock;
}
queued = task_on_rq_queued(p);
+ running = task_current(rq, p);
if (queued)
dequeue_task(rq, p, DEQUEUE_SAVE);
+ if (running)
+ put_prev_task(rq, p);
p->static_prio = NICE_TO_PRIO(nice);
set_load_weight(p);
@@ -3744,6 +3778,8 @@
if (delta < 0 || (delta > 0 && task_running(rq, p)))
resched_curr(rq);
}
+ if (running)
+ set_curr_task(rq, p);
out_unlock:
task_rq_unlock(rq, p, &rf);
}
@@ -4243,8 +4279,6 @@
prev_class = p->sched_class;
__setscheduler(rq, p, attr, pi);
- if (running)
- p->sched_class->set_curr_task(rq);
if (queued) {
/*
* We enqueue to tail when the priority of a task is
@@ -4255,6 +4289,8 @@
enqueue_task(rq, p, queue_flags);
}
+ if (running)
+ set_curr_task(rq, p);
check_class_changed(rq, p, prev_class, oldprio);
preempt_disable(); /* avoid rq from going away on us */
@@ -4846,7 +4882,7 @@
{
struct rq *rq = this_rq_lock();
- schedstat_inc(rq, yld_count);
+ schedstat_inc(rq->yld_count);
current->sched_class->yield_task(rq);
/*
@@ -4863,6 +4899,7 @@
return 0;
}
+#ifndef CONFIG_PREEMPT
int __sched _cond_resched(void)
{
if (should_resched(0)) {
@@ -4872,6 +4909,7 @@
return 0;
}
EXPORT_SYMBOL(_cond_resched);
+#endif
/*
* __cond_resched_lock() - if a reschedule is pending, drop the given lock,
@@ -4997,7 +5035,7 @@
yielded = curr->sched_class->yield_to_task(rq, p, preempt);
if (yielded) {
- schedstat_inc(rq, yld_count);
+ schedstat_inc(rq->yld_count);
/*
* Make p's CPU reschedule; pick_next_entity takes care of
* fairness.
@@ -5417,10 +5455,10 @@
p->numa_preferred_nid = nid;
- if (running)
- p->sched_class->set_curr_task(rq);
if (queued)
enqueue_task(rq, p, ENQUEUE_RESTORE);
+ if (running)
+ set_curr_task(rq, p);
task_rq_unlock(rq, p, &rf);
}
#endif /* CONFIG_NUMA_BALANCING */
@@ -5717,6 +5755,8 @@
}
}
#else /* !CONFIG_SCHED_DEBUG */
+
+# define sched_debug_enabled 0
# define sched_domain_debug(sd, cpu) do { } while (0)
static inline bool sched_debug(void)
{
@@ -5735,6 +5775,7 @@
SD_BALANCE_FORK |
SD_BALANCE_EXEC |
SD_SHARE_CPUCAPACITY |
+ SD_ASYM_CPUCAPACITY |
SD_SHARE_PKG_RESOURCES |
SD_SHARE_POWERDOMAIN)) {
if (sd->groups != sd->groups->next)
@@ -5765,6 +5806,7 @@
SD_BALANCE_NEWIDLE |
SD_BALANCE_FORK |
SD_BALANCE_EXEC |
+ SD_ASYM_CPUCAPACITY |
SD_SHARE_CPUCAPACITY |
SD_SHARE_PKG_RESOURCES |
SD_PREFER_SIBLING |
@@ -5909,10 +5951,8 @@
} while (sg != first);
}
-static void free_sched_domain(struct rcu_head *rcu)
+static void destroy_sched_domain(struct sched_domain *sd)
{
- struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
-
/*
* If its an overlapping domain it has private groups, iterate and
* nuke them all.
@@ -5923,18 +5963,26 @@
kfree(sd->groups->sgc);
kfree(sd->groups);
}
+ if (sd->shared && atomic_dec_and_test(&sd->shared->ref))
+ kfree(sd->shared);
kfree(sd);
}
-static void destroy_sched_domain(struct sched_domain *sd, int cpu)
+static void destroy_sched_domains_rcu(struct rcu_head *rcu)
{
- call_rcu(&sd->rcu, free_sched_domain);
+ struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
+
+ while (sd) {
+ struct sched_domain *parent = sd->parent;
+ destroy_sched_domain(sd);
+ sd = parent;
+ }
}
-static void destroy_sched_domains(struct sched_domain *sd, int cpu)
+static void destroy_sched_domains(struct sched_domain *sd)
{
- for (; sd; sd = sd->parent)
- destroy_sched_domain(sd, cpu);
+ if (sd)
+ call_rcu(&sd->rcu, destroy_sched_domains_rcu);
}
/*
@@ -5949,14 +5997,14 @@
DEFINE_PER_CPU(struct sched_domain *, sd_llc);
DEFINE_PER_CPU(int, sd_llc_size);
DEFINE_PER_CPU(int, sd_llc_id);
+DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
DEFINE_PER_CPU(struct sched_domain *, sd_numa);
-DEFINE_PER_CPU(struct sched_domain *, sd_busy);
DEFINE_PER_CPU(struct sched_domain *, sd_asym);
static void update_top_cache_domain(int cpu)
{
+ struct sched_domain_shared *sds = NULL;
struct sched_domain *sd;
- struct sched_domain *busy_sd = NULL;
int id = cpu;
int size = 1;
@@ -5964,13 +6012,13 @@
if (sd) {
id = cpumask_first(sched_domain_span(sd));
size = cpumask_weight(sched_domain_span(sd));
- busy_sd = sd->parent; /* sd_busy */
+ sds = sd->shared;
}
- rcu_assign_pointer(per_cpu(sd_busy, cpu), busy_sd);
rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
per_cpu(sd_llc_size, cpu) = size;
per_cpu(sd_llc_id, cpu) = id;
+ rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
sd = lowest_flag_domain(cpu, SD_NUMA);
rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
@@ -6006,7 +6054,7 @@
*/
if (parent->flags & SD_PREFER_SIBLING)
tmp->flags |= SD_PREFER_SIBLING;
- destroy_sched_domain(parent, cpu);
+ destroy_sched_domain(parent);
} else
tmp = tmp->parent;
}
@@ -6014,7 +6062,7 @@
if (sd && sd_degenerate(sd)) {
tmp = sd;
sd = sd->parent;
- destroy_sched_domain(tmp, cpu);
+ destroy_sched_domain(tmp);
if (sd)
sd->child = NULL;
}
@@ -6024,7 +6072,7 @@
rq_attach_root(rq, rd);
tmp = rq->sd;
rcu_assign_pointer(rq->sd, sd);
- destroy_sched_domains(tmp, cpu);
+ destroy_sched_domains(tmp);
update_top_cache_domain(cpu);
}
@@ -6267,7 +6315,6 @@
return;
update_group_capacity(sd, cpu);
- atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
}
/*
@@ -6355,6 +6402,9 @@
WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
*per_cpu_ptr(sdd->sd, cpu) = NULL;
+ if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))
+ *per_cpu_ptr(sdd->sds, cpu) = NULL;
+
if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
*per_cpu_ptr(sdd->sg, cpu) = NULL;
@@ -6374,26 +6424,37 @@
/*
* SD_flags allowed in topology descriptions.
*
- * SD_SHARE_CPUCAPACITY - describes SMT topologies
- * SD_SHARE_PKG_RESOURCES - describes shared caches
- * SD_NUMA - describes NUMA topologies
- * SD_SHARE_POWERDOMAIN - describes shared power domain
+ * These flags are purely descriptive of the topology and do not prescribe
+ * behaviour. Behaviour is artificial and mapped in the below sd_init()
+ * function:
*
- * Odd one out:
- * SD_ASYM_PACKING - describes SMT quirks
+ * SD_SHARE_CPUCAPACITY - describes SMT topologies
+ * SD_SHARE_PKG_RESOURCES - describes shared caches
+ * SD_NUMA - describes NUMA topologies
+ * SD_SHARE_POWERDOMAIN - describes shared power domain
+ * SD_ASYM_CPUCAPACITY - describes mixed capacity topologies
+ *
+ * Odd one out, which beside describing the topology has a quirk also
+ * prescribes the desired behaviour that goes along with it:
+ *
+ * SD_ASYM_PACKING - describes SMT quirks
*/
#define TOPOLOGY_SD_FLAGS \
(SD_SHARE_CPUCAPACITY | \
SD_SHARE_PKG_RESOURCES | \
SD_NUMA | \
SD_ASYM_PACKING | \
+ SD_ASYM_CPUCAPACITY | \
SD_SHARE_POWERDOMAIN)
static struct sched_domain *
-sd_init(struct sched_domain_topology_level *tl, int cpu)
+sd_init(struct sched_domain_topology_level *tl,
+ const struct cpumask *cpu_map,
+ struct sched_domain *child, int cpu)
{
- struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
- int sd_weight, sd_flags = 0;
+ struct sd_data *sdd = &tl->data;
+ struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
+ int sd_id, sd_weight, sd_flags = 0;
#ifdef CONFIG_NUMA
/*
@@ -6442,15 +6503,26 @@
.smt_gain = 0,
.max_newidle_lb_cost = 0,
.next_decay_max_lb_cost = jiffies,
+ .child = child,
#ifdef CONFIG_SCHED_DEBUG
.name = tl->name,
#endif
};
+ cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
+ sd_id = cpumask_first(sched_domain_span(sd));
+
/*
* Convert topological properties into behaviour.
*/
+ if (sd->flags & SD_ASYM_CPUCAPACITY) {
+ struct sched_domain *t = sd;
+
+ for_each_lower_domain(t)
+ t->flags |= SD_BALANCE_WAKE;
+ }
+
if (sd->flags & SD_SHARE_CPUCAPACITY) {
sd->flags |= SD_PREFER_SIBLING;
sd->imbalance_pct = 110;
@@ -6482,7 +6554,17 @@
sd->idle_idx = 1;
}
- sd->private = &tl->data;
+ /*
+ * For all levels sharing cache; connect a sched_domain_shared
+ * instance.
+ */
+ if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+ sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
+ atomic_inc(&sd->shared->ref);
+ atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
+ }
+
+ sd->private = sdd;
return sd;
}
@@ -6509,6 +6591,9 @@
void set_sched_topology(struct sched_domain_topology_level *tl)
{
+ if (WARN_ON_ONCE(sched_smp_initialized))
+ return;
+
sched_domain_topology = tl;
}
@@ -6789,6 +6874,10 @@
if (!sdd->sd)
return -ENOMEM;
+ sdd->sds = alloc_percpu(struct sched_domain_shared *);
+ if (!sdd->sds)
+ return -ENOMEM;
+
sdd->sg = alloc_percpu(struct sched_group *);
if (!sdd->sg)
return -ENOMEM;
@@ -6799,6 +6888,7 @@
for_each_cpu(j, cpu_map) {
struct sched_domain *sd;
+ struct sched_domain_shared *sds;
struct sched_group *sg;
struct sched_group_capacity *sgc;
@@ -6809,6 +6899,13 @@
*per_cpu_ptr(sdd->sd, j) = sd;
+ sds = kzalloc_node(sizeof(struct sched_domain_shared),
+ GFP_KERNEL, cpu_to_node(j));
+ if (!sds)
+ return -ENOMEM;
+
+ *per_cpu_ptr(sdd->sds, j) = sds;
+
sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
GFP_KERNEL, cpu_to_node(j));
if (!sg)
@@ -6848,6 +6945,8 @@
kfree(*per_cpu_ptr(sdd->sd, j));
}
+ if (sdd->sds)
+ kfree(*per_cpu_ptr(sdd->sds, j));
if (sdd->sg)
kfree(*per_cpu_ptr(sdd->sg, j));
if (sdd->sgc)
@@ -6855,6 +6954,8 @@
}
free_percpu(sdd->sd);
sdd->sd = NULL;
+ free_percpu(sdd->sds);
+ sdd->sds = NULL;
free_percpu(sdd->sg);
sdd->sg = NULL;
free_percpu(sdd->sgc);
@@ -6866,16 +6967,12 @@
const struct cpumask *cpu_map, struct sched_domain_attr *attr,
struct sched_domain *child, int cpu)
{
- struct sched_domain *sd = sd_init(tl, cpu);
- if (!sd)
- return child;
+ struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu);
- cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
if (child) {
sd->level = child->level + 1;
sched_domain_level_max = max(sched_domain_level_max, sd->level);
child->parent = sd;
- sd->child = child;
if (!cpumask_subset(sched_domain_span(child),
sched_domain_span(sd))) {
@@ -6906,6 +7003,7 @@
enum s_alloc alloc_state;
struct sched_domain *sd;
struct s_data d;
+ struct rq *rq = NULL;
int i, ret = -ENOMEM;
alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
@@ -6956,11 +7054,22 @@
/* Attach the domains */
rcu_read_lock();
for_each_cpu(i, cpu_map) {
+ rq = cpu_rq(i);
sd = *per_cpu_ptr(d.sd, i);
+
+ /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
+ if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
+ WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
+
cpu_attach_domain(sd, d.rd, i);
}
rcu_read_unlock();
+ if (rq && sched_debug_enabled) {
+ pr_info("span: %*pbl (max cpu_capacity = %lu)\n",
+ cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
+ }
+
ret = 0;
error:
__free_domain_allocs(&d, alloc_state, cpu_map);
@@ -7319,6 +7428,22 @@
}
#endif
+#ifdef CONFIG_SCHED_SMT
+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
+
+static void sched_init_smt(void)
+{
+ /*
+ * We've enumerated all CPUs and will assume that if any CPU
+ * has SMT siblings, CPU0 will too.
+ */
+ if (cpumask_weight(cpu_smt_mask(0)) > 1)
+ static_branch_enable(&sched_smt_present);
+}
+#else
+static inline void sched_init_smt(void) { }
+#endif
+
void __init sched_init_smp(void)
{
cpumask_var_t non_isolated_cpus;
@@ -7348,6 +7473,9 @@
init_sched_rt_class();
init_sched_dl_class();
+
+ sched_init_smt();
+
sched_smp_initialized = true;
}
@@ -7385,6 +7513,7 @@
#endif
DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
+DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
void __init sched_init(void)
{
@@ -7421,6 +7550,8 @@
for_each_possible_cpu(i) {
per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
cpumask_size(), GFP_KERNEL, cpu_to_node(i));
+ per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
+ cpumask_size(), GFP_KERNEL, cpu_to_node(i));
}
#endif /* CONFIG_CPUMASK_OFFSTACK */
@@ -7523,10 +7654,6 @@
set_load_weight(&init_task);
-#ifdef CONFIG_PREEMPT_NOTIFIERS
- INIT_HLIST_HEAD(&init_task.preempt_notifiers);
-#endif
-
/*
* The boot idle thread does lazy MMU switching as well:
*/
@@ -7534,11 +7661,6 @@
enter_lazy_tlb(&init_mm, current);
/*
- * During early bootup we pretend to be a normal task:
- */
- current->sched_class = &fair_sched_class;
-
- /*
* Make us the idle thread. Technically, schedule() should not be
* called from this thread, however somewhere below it might be,
* but because we are the idle thread, we just pick up running again
@@ -7592,6 +7714,7 @@
void ___might_sleep(const char *file, int line, int preempt_offset)
{
static unsigned long prev_jiffy; /* ratelimiting */
+ unsigned long preempt_disable_ip;
rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
@@ -7602,6 +7725,9 @@
return;
prev_jiffy = jiffies;
+ /* Save this before calling printk(), since that will clobber it */
+ preempt_disable_ip = get_preempt_disable_ip(current);
+
printk(KERN_ERR
"BUG: sleeping function called from invalid context at %s:%d\n",
file, line);
@@ -7616,14 +7742,14 @@
debug_show_held_locks(current);
if (irqs_disabled())
print_irqtrace_events(current);
-#ifdef CONFIG_DEBUG_PREEMPT
- if (!preempt_count_equals(preempt_offset)) {
+ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
+ && !preempt_count_equals(preempt_offset)) {
pr_err("Preemption disabled at:");
- print_ip_sym(current->preempt_disable_ip);
+ print_ip_sym(preempt_disable_ip);
pr_cont("\n");
}
-#endif
dump_stack();
+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
}
EXPORT_SYMBOL(___might_sleep);
#endif
@@ -7644,12 +7770,10 @@
if (p->flags & PF_KTHREAD)
continue;
- p->se.exec_start = 0;
-#ifdef CONFIG_SCHEDSTATS
- p->se.statistics.wait_start = 0;
- p->se.statistics.sleep_start = 0;
- p->se.statistics.block_start = 0;
-#endif
+ p->se.exec_start = 0;
+ schedstat_set(p->se.statistics.wait_start, 0);
+ schedstat_set(p->se.statistics.sleep_start, 0);
+ schedstat_set(p->se.statistics.block_start, 0);
if (!dl_task(p) && !rt_task(p)) {
/*
@@ -7710,7 +7834,7 @@
*
* ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
*/
-void set_curr_task(int cpu, struct task_struct *p)
+void ia64_set_curr_task(int cpu, struct task_struct *p)
{
cpu_curr(cpu) = p;
}
@@ -7841,10 +7965,10 @@
sched_change_group(tsk, TASK_MOVE_GROUP);
- if (unlikely(running))
- tsk->sched_class->set_curr_task(rq);
if (queued)
enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
+ if (unlikely(running))
+ set_curr_task(rq, tsk);
task_rq_unlock(rq, tsk, &rf);
}
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index d418449..e731190 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -31,56 +31,81 @@
return (i << 1) + 2;
}
-static void cpudl_exchange(struct cpudl *cp, int a, int b)
+static void cpudl_heapify_down(struct cpudl *cp, int idx)
{
- int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu;
+ int l, r, largest;
- swap(cp->elements[a].cpu, cp->elements[b].cpu);
- swap(cp->elements[a].dl , cp->elements[b].dl );
+ int orig_cpu = cp->elements[idx].cpu;
+ u64 orig_dl = cp->elements[idx].dl;
- swap(cp->elements[cpu_a].idx, cp->elements[cpu_b].idx);
+ if (left_child(idx) >= cp->size)
+ return;
+
+ /* adapted from lib/prio_heap.c */
+ while(1) {
+ u64 largest_dl;
+ l = left_child(idx);
+ r = right_child(idx);
+ largest = idx;
+ largest_dl = orig_dl;
+
+ if ((l < cp->size) && dl_time_before(orig_dl,
+ cp->elements[l].dl)) {
+ largest = l;
+ largest_dl = cp->elements[l].dl;
+ }
+ if ((r < cp->size) && dl_time_before(largest_dl,
+ cp->elements[r].dl))
+ largest = r;
+
+ if (largest == idx)
+ break;
+
+ /* pull largest child onto idx */
+ cp->elements[idx].cpu = cp->elements[largest].cpu;
+ cp->elements[idx].dl = cp->elements[largest].dl;
+ cp->elements[cp->elements[idx].cpu].idx = idx;
+ idx = largest;
+ }
+ /* actual push down of saved original values orig_* */
+ cp->elements[idx].cpu = orig_cpu;
+ cp->elements[idx].dl = orig_dl;
+ cp->elements[cp->elements[idx].cpu].idx = idx;
+}
+
+static void cpudl_heapify_up(struct cpudl *cp, int idx)
+{
+ int p;
+
+ int orig_cpu = cp->elements[idx].cpu;
+ u64 orig_dl = cp->elements[idx].dl;
+
+ if (idx == 0)
+ return;
+
+ do {
+ p = parent(idx);
+ if (dl_time_before(orig_dl, cp->elements[p].dl))
+ break;
+ /* pull parent onto idx */
+ cp->elements[idx].cpu = cp->elements[p].cpu;
+ cp->elements[idx].dl = cp->elements[p].dl;
+ cp->elements[cp->elements[idx].cpu].idx = idx;
+ idx = p;
+ } while (idx != 0);
+ /* actual push up of saved original values orig_* */
+ cp->elements[idx].cpu = orig_cpu;
+ cp->elements[idx].dl = orig_dl;
+ cp->elements[cp->elements[idx].cpu].idx = idx;
}
static void cpudl_heapify(struct cpudl *cp, int idx)
{
- int l, r, largest;
-
- /* adapted from lib/prio_heap.c */
- while(1) {
- l = left_child(idx);
- r = right_child(idx);
- largest = idx;
-
- if ((l < cp->size) && dl_time_before(cp->elements[idx].dl,
- cp->elements[l].dl))
- largest = l;
- if ((r < cp->size) && dl_time_before(cp->elements[largest].dl,
- cp->elements[r].dl))
- largest = r;
- if (largest == idx)
- break;
-
- /* Push idx down the heap one level and bump one up */
- cpudl_exchange(cp, largest, idx);
- idx = largest;
- }
-}
-
-static void cpudl_change_key(struct cpudl *cp, int idx, u64 new_dl)
-{
- WARN_ON(idx == IDX_INVALID || !cpu_present(idx));
-
- if (dl_time_before(new_dl, cp->elements[idx].dl)) {
- cp->elements[idx].dl = new_dl;
- cpudl_heapify(cp, idx);
- } else {
- cp->elements[idx].dl = new_dl;
- while (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl,
- cp->elements[idx].dl)) {
- cpudl_exchange(cp, idx, parent(idx));
- idx = parent(idx);
- }
- }
+ if (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl,
+ cp->elements[idx].dl))
+ cpudl_heapify_up(cp, idx);
+ else
+ cpudl_heapify_down(cp, idx);
}
static inline int cpudl_maximum(struct cpudl *cp)
@@ -120,6 +145,45 @@
}
/*
+ * cpudl_clear - remove a cpu from the cpudl max-heap
+ * @cp: the cpudl max-heap context
+ * @cpu: the target cpu
+ *
+ * Notes: assumes cpu_rq(cpu)->lock is locked
+ *
+ * Returns: (void)
+ */
+void cpudl_clear(struct cpudl *cp, int cpu)
+{
+ int old_idx, new_cpu;
+ unsigned long flags;
+
+ WARN_ON(!cpu_present(cpu));
+
+ raw_spin_lock_irqsave(&cp->lock, flags);
+
+ old_idx = cp->elements[cpu].idx;
+ if (old_idx == IDX_INVALID) {
+ /*
+ * Nothing to remove if old_idx was invalid.
+ * This could happen if a rq_offline_dl is
+ * called for a CPU without -dl tasks running.
+ */
+ } else {
+ new_cpu = cp->elements[cp->size - 1].cpu;
+ cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl;
+ cp->elements[old_idx].cpu = new_cpu;
+ cp->size--;
+ cp->elements[new_cpu].idx = old_idx;
+ cp->elements[cpu].idx = IDX_INVALID;
+ cpudl_heapify(cp, old_idx);
+
+ cpumask_set_cpu(cpu, cp->free_cpus);
+ }
+ raw_spin_unlock_irqrestore(&cp->lock, flags);
+}
+
+/*
* cpudl_set - update the cpudl max-heap
* @cp: the cpudl max-heap context
* @cpu: the target cpu
@@ -129,55 +193,28 @@
*
* Returns: (void)
*/
-void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
+void cpudl_set(struct cpudl *cp, int cpu, u64 dl)
{
- int old_idx, new_cpu;
+ int old_idx;
unsigned long flags;
WARN_ON(!cpu_present(cpu));
raw_spin_lock_irqsave(&cp->lock, flags);
+
old_idx = cp->elements[cpu].idx;
- if (!is_valid) {
- /* remove item */
- if (old_idx == IDX_INVALID) {
- /*
- * Nothing to remove if old_idx was invalid.
- * This could happen if a rq_offline_dl is
- * called for a CPU without -dl tasks running.
- */
- goto out;
- }
- new_cpu = cp->elements[cp->size - 1].cpu;
- cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl;
- cp->elements[old_idx].cpu = new_cpu;
- cp->size--;
- cp->elements[new_cpu].idx = old_idx;
- cp->elements[cpu].idx = IDX_INVALID;
- while (old_idx > 0 && dl_time_before(
- cp->elements[parent(old_idx)].dl,
- cp->elements[old_idx].dl)) {
- cpudl_exchange(cp, old_idx, parent(old_idx));
- old_idx = parent(old_idx);
- }
- cpumask_set_cpu(cpu, cp->free_cpus);
- cpudl_heapify(cp, old_idx);
-
- goto out;
- }
-
if (old_idx == IDX_INVALID) {
- cp->size++;
- cp->elements[cp->size - 1].dl = dl;
- cp->elements[cp->size - 1].cpu = cpu;
- cp->elements[cpu].idx = cp->size - 1;
- cpudl_change_key(cp, cp->size - 1, dl);
+ int new_idx = cp->size++;
+ cp->elements[new_idx].dl = dl;
+ cp->elements[new_idx].cpu = cpu;
+ cp->elements[cpu].idx = new_idx;
+ cpudl_heapify_up(cp, new_idx);
cpumask_clear_cpu(cpu, cp->free_cpus);
} else {
- cpudl_change_key(cp, old_idx, dl);
+ cp->elements[old_idx].dl = dl;
+ cpudl_heapify(cp, old_idx);
}
-out:
raw_spin_unlock_irqrestore(&cp->lock, flags);
}
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
index fcbdf83..f7da8c5 100644
--- a/kernel/sched/cpudeadline.h
+++ b/kernel/sched/cpudeadline.h
@@ -23,7 +23,8 @@
#ifdef CONFIG_SMP
int cpudl_find(struct cpudl *cp, struct task_struct *p,
struct cpumask *later_mask);
-void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
+void cpudl_set(struct cpudl *cp, int cpu, u64 dl);
+void cpudl_clear(struct cpudl *cp, int cpu);
int cpudl_init(struct cpudl *cp);
void cpudl_set_freecpu(struct cpudl *cp, int cpu);
void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index 1141954..dbc5144 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -33,7 +33,7 @@
*/
void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
void (*func)(struct update_util_data *data, u64 time,
- unsigned long util, unsigned long max))
+ unsigned int flags))
{
if (WARN_ON(!data || !func))
return;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index a84641b..69e0689 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -12,7 +12,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cpufreq.h>
-#include <linux/module.h>
#include <linux/slab.h>
#include <trace/events/power.h>
@@ -48,11 +47,14 @@
struct sugov_policy *sg_policy;
unsigned int cached_raw_freq;
+ unsigned long iowait_boost;
+ unsigned long iowait_boost_max;
+ u64 last_update;
/* The fields below are only needed when sharing a policy. */
unsigned long util;
unsigned long max;
- u64 last_update;
+ unsigned int flags;
};
static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
@@ -144,24 +146,75 @@
return cpufreq_driver_resolve_freq(policy, freq);
}
+static void sugov_get_util(unsigned long *util, unsigned long *max)
+{
+ struct rq *rq = this_rq();
+ unsigned long cfs_max;
+
+ cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id());
+
+ *util = min(rq->cfs.avg.util_avg, cfs_max);
+ *max = cfs_max;
+}
+
+static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
+ unsigned int flags)
+{
+ if (flags & SCHED_CPUFREQ_IOWAIT) {
+ sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ } else if (sg_cpu->iowait_boost) {
+ s64 delta_ns = time - sg_cpu->last_update;
+
+ /* Clear iowait_boost if the CPU apprears to have been idle. */
+ if (delta_ns > TICK_NSEC)
+ sg_cpu->iowait_boost = 0;
+ }
+}
+
+static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
+ unsigned long *max)
+{
+ unsigned long boost_util = sg_cpu->iowait_boost;
+ unsigned long boost_max = sg_cpu->iowait_boost_max;
+
+ if (!boost_util)
+ return;
+
+ if (*util * boost_max < *max * boost_util) {
+ *util = boost_util;
+ *max = boost_max;
+ }
+ sg_cpu->iowait_boost >>= 1;
+}
+
static void sugov_update_single(struct update_util_data *hook, u64 time,
- unsigned long util, unsigned long max)
+ unsigned int flags)
{
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
+ unsigned long util, max;
unsigned int next_f;
+ sugov_set_iowait_boost(sg_cpu, time, flags);
+ sg_cpu->last_update = time;
+
if (!sugov_should_update_freq(sg_policy, time))
return;
- next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
- get_next_freq(sg_cpu, util, max);
+ if (flags & SCHED_CPUFREQ_RT_DL) {
+ next_f = policy->cpuinfo.max_freq;
+ } else {
+ sugov_get_util(&util, &max);
+ sugov_iowait_boost(sg_cpu, &util, &max);
+ next_f = get_next_freq(sg_cpu, util, max);
+ }
sugov_update_commit(sg_policy, time, next_f);
}
static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
- unsigned long util, unsigned long max)
+ unsigned long util, unsigned long max,
+ unsigned int flags)
{
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
@@ -169,9 +222,11 @@
u64 last_freq_update_time = sg_policy->last_freq_update_time;
unsigned int j;
- if (util == ULONG_MAX)
+ if (flags & SCHED_CPUFREQ_RT_DL)
return max_f;
+ sugov_iowait_boost(sg_cpu, &util, &max);
+
for_each_cpu(j, policy->cpus) {
struct sugov_cpu *j_sg_cpu;
unsigned long j_util, j_max;
@@ -186,41 +241,50 @@
* frequency update and the time elapsed between the last update
* of the CPU utilization and the last frequency update is long
* enough, don't take the CPU into account as it probably is
- * idle now.
+ * idle now (and clear iowait_boost for it).
*/
delta_ns = last_freq_update_time - j_sg_cpu->last_update;
- if (delta_ns > TICK_NSEC)
+ if (delta_ns > TICK_NSEC) {
+ j_sg_cpu->iowait_boost = 0;
continue;
-
- j_util = j_sg_cpu->util;
- if (j_util == ULONG_MAX)
+ }
+ if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
return max_f;
+ j_util = j_sg_cpu->util;
j_max = j_sg_cpu->max;
if (j_util * max > j_max * util) {
util = j_util;
max = j_max;
}
+
+ sugov_iowait_boost(j_sg_cpu, &util, &max);
}
return get_next_freq(sg_cpu, util, max);
}
static void sugov_update_shared(struct update_util_data *hook, u64 time,
- unsigned long util, unsigned long max)
+ unsigned int flags)
{
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+ unsigned long util, max;
unsigned int next_f;
+ sugov_get_util(&util, &max);
+
raw_spin_lock(&sg_policy->update_lock);
sg_cpu->util = util;
sg_cpu->max = max;
+ sg_cpu->flags = flags;
+
+ sugov_set_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time;
if (sugov_should_update_freq(sg_policy, time)) {
- next_f = sugov_next_freq_shared(sg_cpu, util, max);
+ next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
sugov_update_commit(sg_policy, time, next_f);
}
@@ -444,10 +508,13 @@
sg_cpu->sg_policy = sg_policy;
if (policy_is_shared(policy)) {
- sg_cpu->util = ULONG_MAX;
+ sg_cpu->util = 0;
sg_cpu->max = 0;
+ sg_cpu->flags = SCHED_CPUFREQ_RT;
sg_cpu->last_update = 0;
sg_cpu->cached_raw_freq = 0;
+ sg_cpu->iowait_boost = 0;
+ sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
sugov_update_shared);
} else {
@@ -495,28 +562,15 @@
.limits = sugov_limits,
};
-static int __init sugov_module_init(void)
-{
- return cpufreq_register_governor(&schedutil_gov);
-}
-
-static void __exit sugov_module_exit(void)
-{
- cpufreq_unregister_governor(&schedutil_gov);
-}
-
-MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>");
-MODULE_DESCRIPTION("Utilization-based CPU frequency selection");
-MODULE_LICENSE("GPL");
-
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
struct cpufreq_governor *cpufreq_default_governor(void)
{
return &schedutil_gov;
}
-
-fs_initcall(sugov_module_init);
-#else
-module_init(sugov_module_init);
#endif
-module_exit(sugov_module_exit);
+
+static int __init sugov_register(void)
+{
+ return cpufreq_register_governor(&schedutil_gov);
+}
+fs_initcall(sugov_register);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index a846cf8..5ebee31 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -23,10 +23,8 @@
* task when irq is in progress while we read rq->clock. That is a worthy
* compromise in place of having locks on each irq in account_system_time.
*/
-DEFINE_PER_CPU(u64, cpu_hardirq_time);
-DEFINE_PER_CPU(u64, cpu_softirq_time);
+DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
-static DEFINE_PER_CPU(u64, irq_start_time);
static int sched_clock_irqtime;
void enable_sched_clock_irqtime(void)
@@ -39,16 +37,13 @@
sched_clock_irqtime = 0;
}
-#ifndef CONFIG_64BIT
-DEFINE_PER_CPU(seqcount_t, irq_time_seq);
-#endif /* CONFIG_64BIT */
-
/*
* Called before incrementing preempt_count on {soft,}irq_enter
* and before decrementing preempt_count on {soft,}irq_exit.
*/
void irqtime_account_irq(struct task_struct *curr)
{
+ struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
s64 delta;
int cpu;
@@ -56,10 +51,10 @@
return;
cpu = smp_processor_id();
- delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
- __this_cpu_add(irq_start_time, delta);
+ delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
+ irqtime->irq_start_time += delta;
- irq_time_write_begin();
+ u64_stats_update_begin(&irqtime->sync);
/*
* We do not account for softirq time from ksoftirqd here.
* We want to continue accounting softirq time to ksoftirqd thread
@@ -67,42 +62,36 @@
* that do not consume any time, but still wants to run.
*/
if (hardirq_count())
- __this_cpu_add(cpu_hardirq_time, delta);
+ irqtime->hardirq_time += delta;
else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
- __this_cpu_add(cpu_softirq_time, delta);
+ irqtime->softirq_time += delta;
- irq_time_write_end();
+ u64_stats_update_end(&irqtime->sync);
}
EXPORT_SYMBOL_GPL(irqtime_account_irq);
-static cputime_t irqtime_account_hi_update(cputime_t maxtime)
+static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
- unsigned long flags;
cputime_t irq_cputime;
- local_irq_save(flags);
- irq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time)) -
- cpustat[CPUTIME_IRQ];
+ irq_cputime = nsecs_to_cputime64(irqtime) - cpustat[idx];
irq_cputime = min(irq_cputime, maxtime);
- cpustat[CPUTIME_IRQ] += irq_cputime;
- local_irq_restore(flags);
+ cpustat[idx] += irq_cputime;
+
return irq_cputime;
}
+static cputime_t irqtime_account_hi_update(cputime_t maxtime)
+{
+ return irqtime_account_update(__this_cpu_read(cpu_irqtime.hardirq_time),
+ CPUTIME_IRQ, maxtime);
+}
+
static cputime_t irqtime_account_si_update(cputime_t maxtime)
{
- u64 *cpustat = kcpustat_this_cpu->cpustat;
- unsigned long flags;
- cputime_t softirq_cputime;
-
- local_irq_save(flags);
- softirq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time)) -
- cpustat[CPUTIME_SOFTIRQ];
- softirq_cputime = min(softirq_cputime, maxtime);
- cpustat[CPUTIME_SOFTIRQ] += softirq_cputime;
- local_irq_restore(flags);
- return softirq_cputime;
+ return irqtime_account_update(__this_cpu_read(cpu_irqtime.softirq_time),
+ CPUTIME_SOFTIRQ, maxtime);
}
#else /* CONFIG_IRQ_TIME_ACCOUNTING */
@@ -295,6 +284,9 @@
{
cputime_t accounted;
+ /* Shall be converted to a lockdep-enabled lightweight check */
+ WARN_ON_ONCE(!irqs_disabled());
+
accounted = steal_account_process_time(max);
if (accounted < max)
@@ -306,6 +298,26 @@
return accounted;
}
+#ifdef CONFIG_64BIT
+static inline u64 read_sum_exec_runtime(struct task_struct *t)
+{
+ return t->se.sum_exec_runtime;
+}
+#else
+static u64 read_sum_exec_runtime(struct task_struct *t)
+{
+ u64 ns;
+ struct rq_flags rf;
+ struct rq *rq;
+
+ rq = task_rq_lock(t, &rf);
+ ns = t->se.sum_exec_runtime;
+ task_rq_unlock(rq, t, &rf);
+
+ return ns;
+}
+#endif
+
/*
* Accumulate raw cputime values of dead tasks (sig->[us]time) and live
* tasks (sum on group iteration) belonging to @tsk's group.
@@ -318,6 +330,17 @@
unsigned int seq, nextseq;
unsigned long flags;
+ /*
+ * Update current task runtime to account pending time since last
+ * scheduler action or thread_group_cputime() call. This thread group
+ * might have other running tasks on different CPUs, but updating
+ * their runtime can affect syscall performance, so we skip account
+ * those pending times and rely only on values updated on tick or
+ * other scheduler action.
+ */
+ if (same_thread_group(current, tsk))
+ (void) task_sched_runtime(current);
+
rcu_read_lock();
/* Attempt a lockless read on the first round. */
nextseq = 0;
@@ -332,7 +355,7 @@
task_cputime(t, &utime, &stime);
times->utime += utime;
times->stime += stime;
- times->sum_exec_runtime += task_sched_runtime(t);
+ times->sum_exec_runtime += read_sum_exec_runtime(t);
}
/* If lockless access failed, take the lock. */
nextseq = 1;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 1ce8867..37e2449 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -243,10 +243,8 @@
static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
{
struct rq *later_rq = NULL;
- bool fallback = false;
later_rq = find_lock_later_rq(p, rq);
-
if (!later_rq) {
int cpu;
@@ -254,7 +252,6 @@
* If we cannot preempt any rq, fall back to pick any
* online cpu.
*/
- fallback = true;
cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
if (cpu >= nr_cpu_ids) {
/*
@@ -274,16 +271,7 @@
double_lock_balance(rq, later_rq);
}
- /*
- * By now the task is replenished and enqueued; migrate it.
- */
- deactivate_task(rq, p, 0);
set_task_cpu(p, later_rq->cpu);
- activate_task(later_rq, p, 0);
-
- if (!fallback)
- resched_curr(later_rq);
-
double_unlock_balance(later_rq, rq);
return later_rq;
@@ -346,12 +334,12 @@
* one, and to (try to!) reconcile itself with its own scheduling
* parameters.
*/
-static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
- struct sched_dl_entity *pi_se)
+static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
+ WARN_ON(dl_se->dl_boosted);
WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
/*
@@ -367,8 +355,8 @@
* future; in fact, we must consider execution overheads (time
* spent on hardirq context, etc.).
*/
- dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
- dl_se->runtime = pi_se->dl_runtime;
+ dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
+ dl_se->runtime = dl_se->dl_runtime;
}
/*
@@ -641,6 +629,24 @@
goto unlock;
}
+#ifdef CONFIG_SMP
+ if (unlikely(!rq->online)) {
+ /*
+ * If the runqueue is no longer available, migrate the
+ * task elsewhere. This necessarily changes rq.
+ */
+ lockdep_unpin_lock(&rq->lock, rf.cookie);
+ rq = dl_task_offline_migration(rq, p);
+ rf.cookie = lockdep_pin_lock(&rq->lock);
+
+ /*
+ * Now that the task has been migrated to the new RQ and we
+ * have that locked, proceed as normal and enqueue the task
+ * there.
+ */
+ }
+#endif
+
enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
if (dl_task(rq->curr))
check_preempt_curr_dl(rq, p, 0);
@@ -649,22 +655,6 @@
#ifdef CONFIG_SMP
/*
- * Perform balancing operations here; after the replenishments. We
- * cannot drop rq->lock before this, otherwise the assertion in
- * start_dl_timer() about not missing updates is not true.
- *
- * If we find that the rq the task was on is no longer available, we
- * need to select a new rq.
- *
- * XXX figure out if select_task_rq_dl() deals with offline cpus.
- */
- if (unlikely(!rq->online)) {
- lockdep_unpin_lock(&rq->lock, rf.cookie);
- rq = dl_task_offline_migration(rq, p);
- rf.cookie = lockdep_pin_lock(&rq->lock);
- }
-
- /*
* Queueing this task back might have overloaded rq, check if we need
* to kick someone away.
*/
@@ -735,9 +725,8 @@
return;
}
- /* kick cpufreq (see the comment in linux/cpufreq.h). */
- if (cpu_of(rq) == smp_processor_id())
- cpufreq_trigger_update(rq_clock(rq));
+ /* kick cpufreq (see the comment in kernel/sched/sched.h). */
+ cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
@@ -798,7 +787,7 @@
if (dl_rq->earliest_dl.curr == 0 ||
dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
dl_rq->earliest_dl.curr = deadline;
- cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
+ cpudl_set(&rq->rd->cpudl, rq->cpu, deadline);
}
}
@@ -813,14 +802,14 @@
if (!dl_rq->dl_nr_running) {
dl_rq->earliest_dl.curr = 0;
dl_rq->earliest_dl.next = 0;
- cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
+ cpudl_clear(&rq->rd->cpudl, rq->cpu);
} else {
struct rb_node *leftmost = dl_rq->rb_leftmost;
struct sched_dl_entity *entry;
entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
dl_rq->earliest_dl.curr = entry->deadline;
- cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
+ cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
}
}
@@ -1671,7 +1660,7 @@
cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
if (rq->dl.dl_nr_running > 0)
- cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
+ cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
}
/* Assumes rq->lock is held */
@@ -1680,7 +1669,7 @@
if (rq->dl.overloaded)
dl_clear_overload(rq);
- cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
+ cpudl_clear(&rq->rd->cpudl, rq->cpu);
cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
}
@@ -1723,10 +1712,20 @@
*/
static void switched_to_dl(struct rq *rq, struct task_struct *p)
{
- if (dl_time_before(p->dl.deadline, rq_clock(rq)))
- setup_new_dl_entity(&p->dl, &p->dl);
- if (task_on_rq_queued(p) && rq->curr != p) {
+ /* If p is not queued we will update its parameters at next wakeup. */
+ if (!task_on_rq_queued(p))
+ return;
+
+ /*
+ * If p is boosted we already updated its params in
+ * rt_mutex_setprio()->enqueue_task(..., ENQUEUE_REPLENISH),
+ * p's deadline being now already after rq_clock(rq).
+ */
+ if (dl_time_before(p->dl.deadline, rq_clock(rq)))
+ setup_new_dl_entity(&p->dl);
+
+ if (rq->curr != p) {
#ifdef CONFIG_SMP
if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded)
queue_push_tasks(rq);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 2a0a999..1393588 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -369,8 +369,12 @@
#define P(F) \
SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
+#define P_SCHEDSTAT(F) \
+ SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F))
#define PN(F) \
SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
+#define PN_SCHEDSTAT(F) \
+ SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
if (!se)
return;
@@ -378,26 +382,27 @@
PN(se->exec_start);
PN(se->vruntime);
PN(se->sum_exec_runtime);
-#ifdef CONFIG_SCHEDSTATS
if (schedstat_enabled()) {
- PN(se->statistics.wait_start);
- PN(se->statistics.sleep_start);
- PN(se->statistics.block_start);
- PN(se->statistics.sleep_max);
- PN(se->statistics.block_max);
- PN(se->statistics.exec_max);
- PN(se->statistics.slice_max);
- PN(se->statistics.wait_max);
- PN(se->statistics.wait_sum);
- P(se->statistics.wait_count);
+ PN_SCHEDSTAT(se->statistics.wait_start);
+ PN_SCHEDSTAT(se->statistics.sleep_start);
+ PN_SCHEDSTAT(se->statistics.block_start);
+ PN_SCHEDSTAT(se->statistics.sleep_max);
+ PN_SCHEDSTAT(se->statistics.block_max);
+ PN_SCHEDSTAT(se->statistics.exec_max);
+ PN_SCHEDSTAT(se->statistics.slice_max);
+ PN_SCHEDSTAT(se->statistics.wait_max);
+ PN_SCHEDSTAT(se->statistics.wait_sum);
+ P_SCHEDSTAT(se->statistics.wait_count);
}
-#endif
P(se->load.weight);
#ifdef CONFIG_SMP
P(se->avg.load_avg);
P(se->avg.util_avg);
#endif
+
+#undef PN_SCHEDSTAT
#undef PN
+#undef P_SCHEDSTAT
#undef P
}
#endif
@@ -429,9 +434,9 @@
p->prio);
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
- SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)),
+ SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
SPLIT_NS(p->se.sum_exec_runtime),
- SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime)));
+ SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
#ifdef CONFIG_NUMA_BALANCING
SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
@@ -626,9 +631,7 @@
#undef P64
#endif
-#ifdef CONFIG_SCHEDSTATS
-#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
-
+#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, schedstat_val(rq->n));
if (schedstat_enabled()) {
P(yld_count);
P(sched_count);
@@ -636,9 +639,8 @@
P(ttwu_count);
P(ttwu_local);
}
-
#undef P
-#endif
+
spin_lock_irqsave(&sched_debug_lock, flags);
print_cfs_stats(m, cpu);
print_rt_stats(m, cpu);
@@ -868,10 +870,14 @@
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
#define P(F) \
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
+#define P_SCHEDSTAT(F) \
+ SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
#define __PN(F) \
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
#define PN(F) \
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
+#define PN_SCHEDSTAT(F) \
+ SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
PN(se.exec_start);
PN(se.vruntime);
@@ -881,37 +887,36 @@
P(se.nr_migrations);
-#ifdef CONFIG_SCHEDSTATS
if (schedstat_enabled()) {
u64 avg_atom, avg_per_cpu;
- PN(se.statistics.sum_sleep_runtime);
- PN(se.statistics.wait_start);
- PN(se.statistics.sleep_start);
- PN(se.statistics.block_start);
- PN(se.statistics.sleep_max);
- PN(se.statistics.block_max);
- PN(se.statistics.exec_max);
- PN(se.statistics.slice_max);
- PN(se.statistics.wait_max);
- PN(se.statistics.wait_sum);
- P(se.statistics.wait_count);
- PN(se.statistics.iowait_sum);
- P(se.statistics.iowait_count);
- P(se.statistics.nr_migrations_cold);
- P(se.statistics.nr_failed_migrations_affine);
- P(se.statistics.nr_failed_migrations_running);
- P(se.statistics.nr_failed_migrations_hot);
- P(se.statistics.nr_forced_migrations);
- P(se.statistics.nr_wakeups);
- P(se.statistics.nr_wakeups_sync);
- P(se.statistics.nr_wakeups_migrate);
- P(se.statistics.nr_wakeups_local);
- P(se.statistics.nr_wakeups_remote);
- P(se.statistics.nr_wakeups_affine);
- P(se.statistics.nr_wakeups_affine_attempts);
- P(se.statistics.nr_wakeups_passive);
- P(se.statistics.nr_wakeups_idle);
+ PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
+ PN_SCHEDSTAT(se.statistics.wait_start);
+ PN_SCHEDSTAT(se.statistics.sleep_start);
+ PN_SCHEDSTAT(se.statistics.block_start);
+ PN_SCHEDSTAT(se.statistics.sleep_max);
+ PN_SCHEDSTAT(se.statistics.block_max);
+ PN_SCHEDSTAT(se.statistics.exec_max);
+ PN_SCHEDSTAT(se.statistics.slice_max);
+ PN_SCHEDSTAT(se.statistics.wait_max);
+ PN_SCHEDSTAT(se.statistics.wait_sum);
+ P_SCHEDSTAT(se.statistics.wait_count);
+ PN_SCHEDSTAT(se.statistics.iowait_sum);
+ P_SCHEDSTAT(se.statistics.iowait_count);
+ P_SCHEDSTAT(se.statistics.nr_migrations_cold);
+ P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
+ P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
+ P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
+ P_SCHEDSTAT(se.statistics.nr_forced_migrations);
+ P_SCHEDSTAT(se.statistics.nr_wakeups);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_local);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
avg_atom = p->se.sum_exec_runtime;
if (nr_switches)
@@ -930,7 +935,7 @@
__PN(avg_atom);
__PN(avg_per_cpu);
}
-#endif
+
__P(nr_switches);
SEQ_printf(m, "%-45s:%21Ld\n",
"nr_voluntary_switches", (long long)p->nvcsw);
@@ -947,8 +952,10 @@
#endif
P(policy);
P(prio);
+#undef PN_SCHEDSTAT
#undef PN
#undef __PN
+#undef P_SCHEDSTAT
#undef P
#undef __P
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 039de34..502e95a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -114,6 +114,12 @@
unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
#endif
+/*
+ * The margin used when comparing utilization with CPU capacity:
+ * util * 1024 < capacity * margin
+ */
+unsigned int capacity_margin = 1280; /* ~20% */
+
static inline void update_load_add(struct load_weight *lw, unsigned long inc)
{
lw->weight += inc;
@@ -256,9 +262,7 @@
static inline struct task_struct *task_of(struct sched_entity *se)
{
-#ifdef CONFIG_SCHED_DEBUG
- WARN_ON_ONCE(!entity_is_task(se));
-#endif
+ SCHED_WARN_ON(!entity_is_task(se));
return container_of(se, struct task_struct, se);
}
@@ -456,17 +460,23 @@
static void update_min_vruntime(struct cfs_rq *cfs_rq)
{
+ struct sched_entity *curr = cfs_rq->curr;
+
u64 vruntime = cfs_rq->min_vruntime;
- if (cfs_rq->curr)
- vruntime = cfs_rq->curr->vruntime;
+ if (curr) {
+ if (curr->on_rq)
+ vruntime = curr->vruntime;
+ else
+ curr = NULL;
+ }
if (cfs_rq->rb_leftmost) {
struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
struct sched_entity,
run_node);
- if (!cfs_rq->curr)
+ if (!curr)
vruntime = se->vruntime;
else
vruntime = min_vruntime(vruntime, se->vruntime);
@@ -656,7 +666,7 @@
}
#ifdef CONFIG_SMP
-static int select_idle_sibling(struct task_struct *p, int cpu);
+static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
static unsigned long task_h_load(struct task_struct *p);
/*
@@ -726,7 +736,6 @@
struct sched_avg *sa = &se->avg;
long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
u64 now = cfs_rq_clock_task(cfs_rq);
- int tg_update;
if (cap > 0) {
if (cfs_rq->avg.util_avg != 0) {
@@ -759,10 +768,9 @@
}
}
- tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
+ update_cfs_rq_load_avg(now, cfs_rq, false);
attach_entity_load_avg(cfs_rq, se);
- if (tg_update)
- update_tg_load_avg(cfs_rq, false);
+ update_tg_load_avg(cfs_rq, false);
}
#else /* !CONFIG_SMP */
@@ -799,7 +807,7 @@
max(delta_exec, curr->statistics.exec_max));
curr->sum_exec_runtime += delta_exec;
- schedstat_add(cfs_rq, exec_clock, delta_exec);
+ schedstat_add(cfs_rq->exec_clock, delta_exec);
curr->vruntime += calc_delta_fair(delta_exec, curr);
update_min_vruntime(cfs_rq);
@@ -820,26 +828,34 @@
update_curr(cfs_rq_of(&rq->curr->se));
}
-#ifdef CONFIG_SCHEDSTATS
static inline void
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- u64 wait_start = rq_clock(rq_of(cfs_rq));
+ u64 wait_start, prev_wait_start;
+
+ if (!schedstat_enabled())
+ return;
+
+ wait_start = rq_clock(rq_of(cfs_rq));
+ prev_wait_start = schedstat_val(se->statistics.wait_start);
if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
- likely(wait_start > se->statistics.wait_start))
- wait_start -= se->statistics.wait_start;
+ likely(wait_start > prev_wait_start))
+ wait_start -= prev_wait_start;
- se->statistics.wait_start = wait_start;
+ schedstat_set(se->statistics.wait_start, wait_start);
}
-static void
+static inline void
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
struct task_struct *p;
u64 delta;
- delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
+ if (!schedstat_enabled())
+ return;
+
+ delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start);
if (entity_is_task(se)) {
p = task_of(se);
@@ -849,35 +865,114 @@
* time stamp can be adjusted to accumulate wait time
* prior to migration.
*/
- se->statistics.wait_start = delta;
+ schedstat_set(se->statistics.wait_start, delta);
return;
}
trace_sched_stat_wait(p, delta);
}
- se->statistics.wait_max = max(se->statistics.wait_max, delta);
- se->statistics.wait_count++;
- se->statistics.wait_sum += delta;
- se->statistics.wait_start = 0;
+ schedstat_set(se->statistics.wait_max,
+ max(schedstat_val(se->statistics.wait_max), delta));
+ schedstat_inc(se->statistics.wait_count);
+ schedstat_add(se->statistics.wait_sum, delta);
+ schedstat_set(se->statistics.wait_start, 0);
+}
+
+static inline void
+update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ struct task_struct *tsk = NULL;
+ u64 sleep_start, block_start;
+
+ if (!schedstat_enabled())
+ return;
+
+ sleep_start = schedstat_val(se->statistics.sleep_start);
+ block_start = schedstat_val(se->statistics.block_start);
+
+ if (entity_is_task(se))
+ tsk = task_of(se);
+
+ if (sleep_start) {
+ u64 delta = rq_clock(rq_of(cfs_rq)) - sleep_start;
+
+ if ((s64)delta < 0)
+ delta = 0;
+
+ if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
+ schedstat_set(se->statistics.sleep_max, delta);
+
+ schedstat_set(se->statistics.sleep_start, 0);
+ schedstat_add(se->statistics.sum_sleep_runtime, delta);
+
+ if (tsk) {
+ account_scheduler_latency(tsk, delta >> 10, 1);
+ trace_sched_stat_sleep(tsk, delta);
+ }
+ }
+ if (block_start) {
+ u64 delta = rq_clock(rq_of(cfs_rq)) - block_start;
+
+ if ((s64)delta < 0)
+ delta = 0;
+
+ if (unlikely(delta > schedstat_val(se->statistics.block_max)))
+ schedstat_set(se->statistics.block_max, delta);
+
+ schedstat_set(se->statistics.block_start, 0);
+ schedstat_add(se->statistics.sum_sleep_runtime, delta);
+
+ if (tsk) {
+ if (tsk->in_iowait) {
+ schedstat_add(se->statistics.iowait_sum, delta);
+ schedstat_inc(se->statistics.iowait_count);
+ trace_sched_stat_iowait(tsk, delta);
+ }
+
+ trace_sched_stat_blocked(tsk, delta);
+
+ /*
+ * Blocking time is in units of nanosecs, so shift by
+ * 20 to get a milliseconds-range estimation of the
+ * amount of time that the task spent sleeping:
+ */
+ if (unlikely(prof_on == SLEEP_PROFILING)) {
+ profile_hits(SLEEP_PROFILING,
+ (void *)get_wchan(tsk),
+ delta >> 20);
+ }
+ account_scheduler_latency(tsk, delta >> 10, 0);
+ }
+ }
}
/*
* Task is being enqueued - update stats:
*/
static inline void
-update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
+ if (!schedstat_enabled())
+ return;
+
/*
* Are we enqueueing a waiting task? (for current tasks
* a dequeue/enqueue event is a NOP)
*/
if (se != cfs_rq->curr)
update_stats_wait_start(cfs_rq, se);
+
+ if (flags & ENQUEUE_WAKEUP)
+ update_stats_enqueue_sleeper(cfs_rq, se);
}
static inline void
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
+
+ if (!schedstat_enabled())
+ return;
+
/*
* Mark the end of the wait period if dequeueing a
* waiting task:
@@ -885,39 +980,17 @@
if (se != cfs_rq->curr)
update_stats_wait_end(cfs_rq, se);
- if (flags & DEQUEUE_SLEEP) {
- if (entity_is_task(se)) {
- struct task_struct *tsk = task_of(se);
+ if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) {
+ struct task_struct *tsk = task_of(se);
- if (tsk->state & TASK_INTERRUPTIBLE)
- se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
- if (tsk->state & TASK_UNINTERRUPTIBLE)
- se->statistics.block_start = rq_clock(rq_of(cfs_rq));
- }
+ if (tsk->state & TASK_INTERRUPTIBLE)
+ schedstat_set(se->statistics.sleep_start,
+ rq_clock(rq_of(cfs_rq)));
+ if (tsk->state & TASK_UNINTERRUPTIBLE)
+ schedstat_set(se->statistics.block_start,
+ rq_clock(rq_of(cfs_rq)));
}
-
}
-#else
-static inline void
-update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-
-static inline void
-update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-
-static inline void
-update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-
-static inline void
-update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
-{
-}
-#endif
/*
* We are picking a new current task - update its stats:
@@ -1513,8 +1586,16 @@
* One idle CPU per node is evaluated for a task numa move.
* Call select_idle_sibling to maybe find a better one.
*/
- if (!cur)
- env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
+ if (!cur) {
+ /*
+ * select_idle_siblings() uses an per-cpu cpumask that
+ * can be used from IRQ context.
+ */
+ local_irq_disable();
+ env->dst_cpu = select_idle_sibling(env->p, env->src_cpu,
+ env->dst_cpu);
+ local_irq_enable();
+ }
assign:
task_numa_assign(env, cur, imp);
@@ -2292,7 +2373,7 @@
unsigned long nr_pte_updates = 0;
long pages, virtpages;
- WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work));
+ SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work));
work->next = work; /* protect against double add */
/*
@@ -2803,9 +2884,21 @@
}
#ifdef CONFIG_FAIR_GROUP_SCHED
-/*
- * Updating tg's load_avg is necessary before update_cfs_share (which is done)
- * and effective_load (which is not done because it is too costly).
+/**
+ * update_tg_load_avg - update the tg's load avg
+ * @cfs_rq: the cfs_rq whose avg changed
+ * @force: update regardless of how small the difference
+ *
+ * This function 'ensures': tg->load_avg := \Sum tg->cfs_rq[]->avg.load.
+ * However, because tg->load_avg is a global value there are performance
+ * considerations.
+ *
+ * In order to avoid having to look at the other cfs_rq's, we use a
+ * differential update where we store the last value we propagated. This in
+ * turn allows skipping updates if the differential is 'small'.
+ *
+ * Updating tg's load_avg is necessary before update_cfs_share() (which is
+ * done) and effective_load() (which is not done because it is too costly).
*/
static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
{
@@ -2875,12 +2968,7 @@
static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
{
- struct rq *rq = rq_of(cfs_rq);
- int cpu = cpu_of(rq);
-
- if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
- unsigned long max = rq->cpu_capacity_orig;
-
+ if (&this_rq()->cfs == cfs_rq) {
/*
* There are a few boundary cases this might miss but it should
* get called often enough that that should (hopefully) not be
@@ -2897,8 +2985,7 @@
*
* See cpu_util().
*/
- cpufreq_update_util(rq_clock(rq),
- min(cfs_rq->avg.util_avg, max), max);
+ cpufreq_update_util(rq_of(cfs_rq), 0);
}
}
@@ -2931,10 +3018,10 @@
*
* cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
*
- * Returns true if the load decayed or we removed utilization. It is expected
- * that one calls update_tg_load_avg() on this condition, but after you've
- * modified the cfs_rq avg (attach/detach), such that we propagate the new
- * avg up.
+ * Returns true if the load decayed or we removed load.
+ *
+ * Since both these conditions indicate a changed cfs_rq->avg.load we should
+ * call update_tg_load_avg() when this function returns true.
*/
static inline int
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
@@ -3159,10 +3246,7 @@
static inline void update_load_avg(struct sched_entity *se, int not_used)
{
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
- struct rq *rq = rq_of(cfs_rq);
-
- cpufreq_trigger_update(rq_clock(rq));
+ cpufreq_update_util(rq_of(cfs_rq_of(se)), 0);
}
static inline void
@@ -3183,68 +3267,6 @@
#endif /* CONFIG_SMP */
-static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-#ifdef CONFIG_SCHEDSTATS
- struct task_struct *tsk = NULL;
-
- if (entity_is_task(se))
- tsk = task_of(se);
-
- if (se->statistics.sleep_start) {
- u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.sleep_start;
-
- if ((s64)delta < 0)
- delta = 0;
-
- if (unlikely(delta > se->statistics.sleep_max))
- se->statistics.sleep_max = delta;
-
- se->statistics.sleep_start = 0;
- se->statistics.sum_sleep_runtime += delta;
-
- if (tsk) {
- account_scheduler_latency(tsk, delta >> 10, 1);
- trace_sched_stat_sleep(tsk, delta);
- }
- }
- if (se->statistics.block_start) {
- u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.block_start;
-
- if ((s64)delta < 0)
- delta = 0;
-
- if (unlikely(delta > se->statistics.block_max))
- se->statistics.block_max = delta;
-
- se->statistics.block_start = 0;
- se->statistics.sum_sleep_runtime += delta;
-
- if (tsk) {
- if (tsk->in_iowait) {
- se->statistics.iowait_sum += delta;
- se->statistics.iowait_count++;
- trace_sched_stat_iowait(tsk, delta);
- }
-
- trace_sched_stat_blocked(tsk, delta);
-
- /*
- * Blocking time is in units of nanosecs, so shift by
- * 20 to get a milliseconds-range estimation of the
- * amount of time that the task spent sleeping:
- */
- if (unlikely(prof_on == SLEEP_PROFILING)) {
- profile_hits(SLEEP_PROFILING,
- (void *)get_wchan(tsk),
- delta >> 20);
- }
- account_scheduler_latency(tsk, delta >> 10, 0);
- }
- }
-#endif
-}
-
static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
#ifdef CONFIG_SCHED_DEBUG
@@ -3254,7 +3276,7 @@
d = -d;
if (d > 3*sysctl_sched_latency)
- schedstat_inc(cfs_rq, nr_spread_over);
+ schedstat_inc(cfs_rq->nr_spread_over);
#endif
}
@@ -3371,17 +3393,12 @@
account_entity_enqueue(cfs_rq, se);
update_cfs_shares(cfs_rq);
- if (flags & ENQUEUE_WAKEUP) {
+ if (flags & ENQUEUE_WAKEUP)
place_entity(cfs_rq, se, 0);
- if (schedstat_enabled())
- enqueue_sleeper(cfs_rq, se);
- }
check_schedstat_required();
- if (schedstat_enabled()) {
- update_stats_enqueue(cfs_rq, se);
- check_spread(cfs_rq, se);
- }
+ update_stats_enqueue(cfs_rq, se, flags);
+ check_spread(cfs_rq, se);
if (!curr)
__enqueue_entity(cfs_rq, se);
se->on_rq = 1;
@@ -3448,8 +3465,7 @@
update_curr(cfs_rq);
dequeue_entity_load_avg(cfs_rq, se);
- if (schedstat_enabled())
- update_stats_dequeue(cfs_rq, se, flags);
+ update_stats_dequeue(cfs_rq, se, flags);
clear_buddies(cfs_rq, se);
@@ -3459,9 +3475,10 @@
account_entity_dequeue(cfs_rq, se);
/*
- * Normalize the entity after updating the min_vruntime because the
- * update can refer to the ->curr item and we need to reflect this
- * movement in our normalized position.
+ * Normalize after update_curr(); which will also have moved
+ * min_vruntime if @se is the one holding it back. But before doing
+ * update_min_vruntime() again, which will discount @se's position and
+ * can move min_vruntime forward still more.
*/
if (!(flags & DEQUEUE_SLEEP))
se->vruntime -= cfs_rq->min_vruntime;
@@ -3469,8 +3486,16 @@
/* return excess runtime on last dequeue */
return_cfs_rq_runtime(cfs_rq);
- update_min_vruntime(cfs_rq);
update_cfs_shares(cfs_rq);
+
+ /*
+ * Now advance min_vruntime if @se was the entity holding it back,
+ * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
+ * put back on, and if we advance min_vruntime, we'll be placed back
+ * further than we started -- ie. we'll be penalized.
+ */
+ if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
+ update_min_vruntime(cfs_rq);
}
/*
@@ -3523,25 +3548,25 @@
* a CPU. So account for the time it spent waiting on the
* runqueue.
*/
- if (schedstat_enabled())
- update_stats_wait_end(cfs_rq, se);
+ update_stats_wait_end(cfs_rq, se);
__dequeue_entity(cfs_rq, se);
update_load_avg(se, 1);
}
update_stats_curr_start(cfs_rq, se);
cfs_rq->curr = se;
-#ifdef CONFIG_SCHEDSTATS
+
/*
* Track our maximum slice length, if the CPU's load is at
* least twice that of our own weight (i.e. dont track it
* when there are only lesser-weight tasks around):
*/
if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
- se->statistics.slice_max = max(se->statistics.slice_max,
- se->sum_exec_runtime - se->prev_sum_exec_runtime);
+ schedstat_set(se->statistics.slice_max,
+ max((u64)schedstat_val(se->statistics.slice_max),
+ se->sum_exec_runtime - se->prev_sum_exec_runtime));
}
-#endif
+
se->prev_sum_exec_runtime = se->sum_exec_runtime;
}
@@ -3620,13 +3645,10 @@
/* throttle cfs_rqs exceeding runtime */
check_cfs_rq_runtime(cfs_rq);
- if (schedstat_enabled()) {
- check_spread(cfs_rq, prev);
- if (prev->on_rq)
- update_stats_wait_start(cfs_rq, prev);
- }
+ check_spread(cfs_rq, prev);
if (prev->on_rq) {
+ update_stats_wait_start(cfs_rq, prev);
/* Put 'current' back into the tree. */
__enqueue_entity(cfs_rq, prev);
/* in !on_rq case, update occurred at dequeue */
@@ -4456,9 +4478,9 @@
struct sched_entity *se = &p->se;
struct cfs_rq *cfs_rq = cfs_rq_of(se);
- WARN_ON(task_rq(p) != rq);
+ SCHED_WARN_ON(task_rq(p) != rq);
- if (cfs_rq->nr_running > 1) {
+ if (rq->cfs.h_nr_running > 1) {
u64 slice = sched_slice(cfs_rq, se);
u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
s64 delta = slice - ran;
@@ -4509,6 +4531,14 @@
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
+ /*
+ * If in_iowait is set, the code below may not trigger any cpufreq
+ * utilization updates, so do it here explicitly with the IOWAIT flag
+ * passed.
+ */
+ if (p->in_iowait)
+ cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
+
for_each_sched_entity(se) {
if (se->on_rq)
break;
@@ -4605,6 +4635,11 @@
}
#ifdef CONFIG_SMP
+
+/* Working cpumask for: load_balance, load_balance_newidle. */
+DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
+DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
+
#ifdef CONFIG_NO_HZ_COMMON
/*
* per rq 'load' arrray crap; XXX kill this.
@@ -5006,9 +5041,9 @@
* wl = S * s'_i; see (2)
*/
if (W > 0 && w < W)
- wl = (w * (long)tg->shares) / W;
+ wl = (w * (long)scale_load_down(tg->shares)) / W;
else
- wl = tg->shares;
+ wl = scale_load_down(tg->shares);
/*
* Per the above, wl is the new se->load.weight value; since
@@ -5091,18 +5126,18 @@
return 1;
}
-static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
+static int wake_affine(struct sched_domain *sd, struct task_struct *p,
+ int prev_cpu, int sync)
{
s64 this_load, load;
s64 this_eff_load, prev_eff_load;
- int idx, this_cpu, prev_cpu;
+ int idx, this_cpu;
struct task_group *tg;
unsigned long weight;
int balanced;
idx = sd->wake_idx;
this_cpu = smp_processor_id();
- prev_cpu = task_cpu(p);
load = source_load(prev_cpu, idx);
this_load = target_load(this_cpu, idx);
@@ -5146,13 +5181,13 @@
balanced = this_eff_load <= prev_eff_load;
- schedstat_inc(p, se.statistics.nr_wakeups_affine_attempts);
+ schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
if (!balanced)
return 0;
- schedstat_inc(sd, ttwu_move_affine);
- schedstat_inc(p, se.statistics.nr_wakeups_affine);
+ schedstat_inc(sd->ttwu_move_affine);
+ schedstat_inc(p->se.statistics.nr_wakeups_affine);
return 1;
}
@@ -5228,6 +5263,10 @@
int shallowest_idle_cpu = -1;
int i;
+ /* Check if we have any choice: */
+ if (group->group_weight == 1)
+ return cpumask_first(sched_group_cpus(group));
+
/* Traverse only the allowed CPUs */
for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
if (idle_cpu(i)) {
@@ -5265,64 +5304,237 @@
}
/*
- * Try and locate an idle CPU in the sched_domain.
+ * Implement a for_each_cpu() variant that starts the scan at a given cpu
+ * (@start), and wraps around.
+ *
+ * This is used to scan for idle CPUs; such that not all CPUs looking for an
+ * idle CPU find the same CPU. The down-side is that tasks tend to cycle
+ * through the LLC domain.
+ *
+ * Especially tbench is found sensitive to this.
*/
-static int select_idle_sibling(struct task_struct *p, int target)
+
+static int cpumask_next_wrap(int n, const struct cpumask *mask, int start, int *wrapped)
+{
+ int next;
+
+again:
+ next = find_next_bit(cpumask_bits(mask), nr_cpumask_bits, n+1);
+
+ if (*wrapped) {
+ if (next >= start)
+ return nr_cpumask_bits;
+ } else {
+ if (next >= nr_cpumask_bits) {
+ *wrapped = 1;
+ n = -1;
+ goto again;
+ }
+ }
+
+ return next;
+}
+
+#define for_each_cpu_wrap(cpu, mask, start, wrap) \
+ for ((wrap) = 0, (cpu) = (start)-1; \
+ (cpu) = cpumask_next_wrap((cpu), (mask), (start), &(wrap)), \
+ (cpu) < nr_cpumask_bits; )
+
+#ifdef CONFIG_SCHED_SMT
+
+static inline void set_idle_cores(int cpu, int val)
+{
+ struct sched_domain_shared *sds;
+
+ sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
+ if (sds)
+ WRITE_ONCE(sds->has_idle_cores, val);
+}
+
+static inline bool test_idle_cores(int cpu, bool def)
+{
+ struct sched_domain_shared *sds;
+
+ sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
+ if (sds)
+ return READ_ONCE(sds->has_idle_cores);
+
+ return def;
+}
+
+/*
+ * Scans the local SMT mask to see if the entire core is idle, and records this
+ * information in sd_llc_shared->has_idle_cores.
+ *
+ * Since SMT siblings share all cache levels, inspecting this limited remote
+ * state should be fairly cheap.
+ */
+void __update_idle_core(struct rq *rq)
+{
+ int core = cpu_of(rq);
+ int cpu;
+
+ rcu_read_lock();
+ if (test_idle_cores(core, true))
+ goto unlock;
+
+ for_each_cpu(cpu, cpu_smt_mask(core)) {
+ if (cpu == core)
+ continue;
+
+ if (!idle_cpu(cpu))
+ goto unlock;
+ }
+
+ set_idle_cores(core, 1);
+unlock:
+ rcu_read_unlock();
+}
+
+/*
+ * Scan the entire LLC domain for idle cores; this dynamically switches off if
+ * there are no idle cores left in the system; tracked through
+ * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above.
+ */
+static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
+{
+ struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+ int core, cpu, wrap;
+
+ if (!static_branch_likely(&sched_smt_present))
+ return -1;
+
+ if (!test_idle_cores(target, false))
+ return -1;
+
+ cpumask_and(cpus, sched_domain_span(sd), tsk_cpus_allowed(p));
+
+ for_each_cpu_wrap(core, cpus, target, wrap) {
+ bool idle = true;
+
+ for_each_cpu(cpu, cpu_smt_mask(core)) {
+ cpumask_clear_cpu(cpu, cpus);
+ if (!idle_cpu(cpu))
+ idle = false;
+ }
+
+ if (idle)
+ return core;
+ }
+
+ /*
+ * Failed to find an idle core; stop looking for one.
+ */
+ set_idle_cores(target, 0);
+
+ return -1;
+}
+
+/*
+ * Scan the local SMT mask for idle CPUs.
+ */
+static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
+{
+ int cpu;
+
+ if (!static_branch_likely(&sched_smt_present))
+ return -1;
+
+ for_each_cpu(cpu, cpu_smt_mask(target)) {
+ if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+ continue;
+ if (idle_cpu(cpu))
+ return cpu;
+ }
+
+ return -1;
+}
+
+#else /* CONFIG_SCHED_SMT */
+
+static inline int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
+{
+ return -1;
+}
+
+static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
+{
+ return -1;
+}
+
+#endif /* CONFIG_SCHED_SMT */
+
+/*
+ * Scan the LLC domain for idle CPUs; this is dynamically regulated by
+ * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
+ * average idle time for this rq (as found in rq->avg_idle).
+ */
+static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
+{
+ struct sched_domain *this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
+ u64 avg_idle = this_rq()->avg_idle;
+ u64 avg_cost = this_sd->avg_scan_cost;
+ u64 time, cost;
+ s64 delta;
+ int cpu, wrap;
+
+ /*
+ * Due to large variance we need a large fuzz factor; hackbench in
+ * particularly is sensitive here.
+ */
+ if ((avg_idle / 512) < avg_cost)
+ return -1;
+
+ time = local_clock();
+
+ for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {
+ if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+ continue;
+ if (idle_cpu(cpu))
+ break;
+ }
+
+ time = local_clock() - time;
+ cost = this_sd->avg_scan_cost;
+ delta = (s64)(time - cost) / 8;
+ this_sd->avg_scan_cost += delta;
+
+ return cpu;
+}
+
+/*
+ * Try and locate an idle core/thread in the LLC cache domain.
+ */
+static int select_idle_sibling(struct task_struct *p, int prev, int target)
{
struct sched_domain *sd;
- struct sched_group *sg;
- int i = task_cpu(p);
+ int i;
if (idle_cpu(target))
return target;
/*
- * If the prevous cpu is cache affine and idle, don't be stupid.
+ * If the previous cpu is cache affine and idle, don't be stupid.
*/
- if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
+ if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
+ return prev;
+
+ sd = rcu_dereference(per_cpu(sd_llc, target));
+ if (!sd)
+ return target;
+
+ i = select_idle_core(p, sd, target);
+ if ((unsigned)i < nr_cpumask_bits)
return i;
- /*
- * Otherwise, iterate the domains and find an eligible idle cpu.
- *
- * A completely idle sched group at higher domains is more
- * desirable than an idle group at a lower level, because lower
- * domains have smaller groups and usually share hardware
- * resources which causes tasks to contend on them, e.g. x86
- * hyperthread siblings in the lowest domain (SMT) can contend
- * on the shared cpu pipeline.
- *
- * However, while we prefer idle groups at higher domains
- * finding an idle cpu at the lowest domain is still better than
- * returning 'target', which we've already established, isn't
- * idle.
- */
- sd = rcu_dereference(per_cpu(sd_llc, target));
- for_each_lower_domain(sd) {
- sg = sd->groups;
- do {
- if (!cpumask_intersects(sched_group_cpus(sg),
- tsk_cpus_allowed(p)))
- goto next;
+ i = select_idle_cpu(p, sd, target);
+ if ((unsigned)i < nr_cpumask_bits)
+ return i;
- /* Ensure the entire group is idle */
- for_each_cpu(i, sched_group_cpus(sg)) {
- if (i == target || !idle_cpu(i))
- goto next;
- }
+ i = select_idle_smt(p, sd, target);
+ if ((unsigned)i < nr_cpumask_bits)
+ return i;
- /*
- * It doesn't matter which cpu we pick, the
- * whole group is idle.
- */
- target = cpumask_first_and(sched_group_cpus(sg),
- tsk_cpus_allowed(p));
- goto done;
-next:
- sg = sg->next;
- } while (sg != sd->groups);
- }
-done:
return target;
}
@@ -5360,6 +5572,32 @@
return (util >= capacity) ? capacity : util;
}
+static inline int task_util(struct task_struct *p)
+{
+ return p->se.avg.util_avg;
+}
+
+/*
+ * Disable WAKE_AFFINE in the case where task @p doesn't fit in the
+ * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu.
+ *
+ * In that case WAKE_AFFINE doesn't make sense and we'll let
+ * BALANCE_WAKE sort things out.
+ */
+static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
+{
+ long min_cap, max_cap;
+
+ min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
+ max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;
+
+ /* Minimum capacity is close to max, no need to abort wake_affine */
+ if (max_cap - min_cap < max_cap >> 3)
+ return 0;
+
+ return min_cap * 1024 < task_util(p) * capacity_margin;
+}
+
/*
* select_task_rq_fair: Select target runqueue for the waking task in domains
* that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
@@ -5383,7 +5621,8 @@
if (sd_flag & SD_BALANCE_WAKE) {
record_wakee(p);
- want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+ want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
+ && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
}
rcu_read_lock();
@@ -5409,13 +5648,13 @@
if (affine_sd) {
sd = NULL; /* Prefer wake_affine over balance flags */
- if (cpu != prev_cpu && wake_affine(affine_sd, p, sync))
+ if (cpu != prev_cpu && wake_affine(affine_sd, p, prev_cpu, sync))
new_cpu = cpu;
}
if (!sd) {
if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
- new_cpu = select_idle_sibling(p, new_cpu);
+ new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
} else while (sd) {
struct sched_group *group;
@@ -5939,7 +6178,7 @@
*
* The adjacency matrix of the resulting graph is given by:
*
- * log_2 n
+ * log_2 n
* A_i,j = \Union (i % 2^k == 0) && i / 2^(k+1) == j / 2^(k+1) (6)
* k = 0
*
@@ -5985,7 +6224,7 @@
*
* [XXX write more on how we solve this.. _after_ merging pjt's patches that
* rewrite all of this once again.]
- */
+ */
static unsigned long __read_mostly max_load_balance_interval = HZ/10;
@@ -6133,7 +6372,7 @@
if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
int cpu;
- schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
+ schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
env->flags |= LBF_SOME_PINNED;
@@ -6164,7 +6403,7 @@
env->flags &= ~LBF_ALL_PINNED;
if (task_running(env->src_rq, p)) {
- schedstat_inc(p, se.statistics.nr_failed_migrations_running);
+ schedstat_inc(p->se.statistics.nr_failed_migrations_running);
return 0;
}
@@ -6181,13 +6420,13 @@
if (tsk_cache_hot <= 0 ||
env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
if (tsk_cache_hot == 1) {
- schedstat_inc(env->sd, lb_hot_gained[env->idle]);
- schedstat_inc(p, se.statistics.nr_forced_migrations);
+ schedstat_inc(env->sd->lb_hot_gained[env->idle]);
+ schedstat_inc(p->se.statistics.nr_forced_migrations);
}
return 1;
}
- schedstat_inc(p, se.statistics.nr_failed_migrations_hot);
+ schedstat_inc(p->se.statistics.nr_failed_migrations_hot);
return 0;
}
@@ -6227,7 +6466,7 @@
* so we can safely collect stats here rather than
* inside detach_tasks().
*/
- schedstat_inc(env->sd, lb_gained[env->idle]);
+ schedstat_inc(env->sd->lb_gained[env->idle]);
return p;
}
return NULL;
@@ -6319,7 +6558,7 @@
* so we can safely collect detach_one_task() stats here rather
* than inside detach_one_task().
*/
- schedstat_add(env->sd, lb_gained[env->idle], detached);
+ schedstat_add(env->sd->lb_gained[env->idle], detached);
return detached;
}
@@ -6647,7 +6886,7 @@
/*
* !SD_OVERLAP domains can assume that child groups
* span the current group.
- */
+ */
group = child->groups;
do {
@@ -7147,7 +7386,7 @@
load_above_capacity = busiest->sum_nr_running * SCHED_CAPACITY_SCALE;
if (load_above_capacity > busiest->group_capacity) {
load_above_capacity -= busiest->group_capacity;
- load_above_capacity *= NICE_0_LOAD;
+ load_above_capacity *= scale_load_down(NICE_0_LOAD);
load_above_capacity /= busiest->group_capacity;
} else
load_above_capacity = ~0UL;
@@ -7354,9 +7593,6 @@
*/
#define MAX_PINNED_INTERVAL 512
-/* Working cpumask for load_balance and load_balance_newidle. */
-DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
-
static int need_active_balance(struct lb_env *env)
{
struct sched_domain *sd = env->sd;
@@ -7460,7 +7696,7 @@
cpumask_copy(cpus, cpu_active_mask);
- schedstat_inc(sd, lb_count[idle]);
+ schedstat_inc(sd->lb_count[idle]);
redo:
if (!should_we_balance(&env)) {
@@ -7470,19 +7706,19 @@
group = find_busiest_group(&env);
if (!group) {
- schedstat_inc(sd, lb_nobusyg[idle]);
+ schedstat_inc(sd->lb_nobusyg[idle]);
goto out_balanced;
}
busiest = find_busiest_queue(&env, group);
if (!busiest) {
- schedstat_inc(sd, lb_nobusyq[idle]);
+ schedstat_inc(sd->lb_nobusyq[idle]);
goto out_balanced;
}
BUG_ON(busiest == env.dst_rq);
- schedstat_add(sd, lb_imbalance[idle], env.imbalance);
+ schedstat_add(sd->lb_imbalance[idle], env.imbalance);
env.src_cpu = busiest->cpu;
env.src_rq = busiest;
@@ -7589,7 +7825,7 @@
}
if (!ld_moved) {
- schedstat_inc(sd, lb_failed[idle]);
+ schedstat_inc(sd->lb_failed[idle]);
/*
* Increment the failure counter only on periodic balance.
* We do not want newidle balance, which can be very
@@ -7672,7 +7908,7 @@
* we can't migrate them. Let the imbalance flag set so parent level
* can try to migrate them.
*/
- schedstat_inc(sd, lb_balanced[idle]);
+ schedstat_inc(sd->lb_balanced[idle]);
sd->nr_balance_failed = 0;
@@ -7704,11 +7940,12 @@
}
static inline void
-update_next_balance(struct sched_domain *sd, int cpu_busy, unsigned long *next_balance)
+update_next_balance(struct sched_domain *sd, unsigned long *next_balance)
{
unsigned long interval, next;
- interval = get_sd_balance_interval(sd, cpu_busy);
+ /* used by idle balance, so cpu_busy = 0 */
+ interval = get_sd_balance_interval(sd, 0);
next = sd->last_balance + interval;
if (time_after(*next_balance, next))
@@ -7738,7 +7975,7 @@
rcu_read_lock();
sd = rcu_dereference_check_sched_domain(this_rq->sd);
if (sd)
- update_next_balance(sd, 0, &next_balance);
+ update_next_balance(sd, &next_balance);
rcu_read_unlock();
goto out;
@@ -7756,7 +7993,7 @@
continue;
if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) {
- update_next_balance(sd, 0, &next_balance);
+ update_next_balance(sd, &next_balance);
break;
}
@@ -7774,7 +8011,7 @@
curr_cost += domain_cost;
}
- update_next_balance(sd, 0, &next_balance);
+ update_next_balance(sd, &next_balance);
/*
* Stop searching for tasks to pull if there are
@@ -7864,15 +8101,15 @@
.idle = CPU_IDLE,
};
- schedstat_inc(sd, alb_count);
+ schedstat_inc(sd->alb_count);
p = detach_one_task(&env);
if (p) {
- schedstat_inc(sd, alb_pushed);
+ schedstat_inc(sd->alb_pushed);
/* Active balancing done, reset the failure counter. */
sd->nr_balance_failed = 0;
} else {
- schedstat_inc(sd, alb_failed);
+ schedstat_inc(sd->alb_failed);
}
}
rcu_read_unlock();
@@ -7964,13 +8201,13 @@
int cpu = smp_processor_id();
rcu_read_lock();
- sd = rcu_dereference(per_cpu(sd_busy, cpu));
+ sd = rcu_dereference(per_cpu(sd_llc, cpu));
if (!sd || !sd->nohz_idle)
goto unlock;
sd->nohz_idle = 0;
- atomic_inc(&sd->groups->sgc->nr_busy_cpus);
+ atomic_inc(&sd->shared->nr_busy_cpus);
unlock:
rcu_read_unlock();
}
@@ -7981,13 +8218,13 @@
int cpu = smp_processor_id();
rcu_read_lock();
- sd = rcu_dereference(per_cpu(sd_busy, cpu));
+ sd = rcu_dereference(per_cpu(sd_llc, cpu));
if (!sd || sd->nohz_idle)
goto unlock;
sd->nohz_idle = 1;
- atomic_dec(&sd->groups->sgc->nr_busy_cpus);
+ atomic_dec(&sd->shared->nr_busy_cpus);
unlock:
rcu_read_unlock();
}
@@ -8214,8 +8451,8 @@
static inline bool nohz_kick_needed(struct rq *rq)
{
unsigned long now = jiffies;
+ struct sched_domain_shared *sds;
struct sched_domain *sd;
- struct sched_group_capacity *sgc;
int nr_busy, cpu = rq->cpu;
bool kick = false;
@@ -8243,11 +8480,13 @@
return true;
rcu_read_lock();
- sd = rcu_dereference(per_cpu(sd_busy, cpu));
- if (sd) {
- sgc = sd->groups->sgc;
- nr_busy = atomic_read(&sgc->nr_busy_cpus);
-
+ sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
+ if (sds) {
+ /*
+ * XXX: write a coherent comment on why we do this.
+ * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com
+ */
+ nr_busy = atomic_read(&sds->nr_busy_cpus);
if (nr_busy > 1) {
kick = true;
goto unlock;
@@ -8441,7 +8680,6 @@
struct sched_entity *se = &p->se;
struct cfs_rq *cfs_rq = cfs_rq_of(se);
u64 now = cfs_rq_clock_task(cfs_rq);
- int tg_update;
if (!vruntime_normalized(p)) {
/*
@@ -8453,10 +8691,9 @@
}
/* Catch up with the cfs_rq and remove our load when we leave */
- tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
+ update_cfs_rq_load_avg(now, cfs_rq, false);
detach_entity_load_avg(cfs_rq, se);
- if (tg_update)
- update_tg_load_avg(cfs_rq, false);
+ update_tg_load_avg(cfs_rq, false);
}
static void attach_task_cfs_rq(struct task_struct *p)
@@ -8464,7 +8701,6 @@
struct sched_entity *se = &p->se;
struct cfs_rq *cfs_rq = cfs_rq_of(se);
u64 now = cfs_rq_clock_task(cfs_rq);
- int tg_update;
#ifdef CONFIG_FAIR_GROUP_SCHED
/*
@@ -8475,10 +8711,9 @@
#endif
/* Synchronize task with its cfs_rq */
- tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
+ update_cfs_rq_load_avg(now, cfs_rq, false);
attach_entity_load_avg(cfs_rq, se);
- if (tg_update)
- update_tg_load_avg(cfs_rq, false);
+ update_tg_load_avg(cfs_rq, false);
if (!vruntime_normalized(p))
se->vruntime += cfs_rq->min_vruntime;
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index 2ce5458..5405d3f 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -27,8 +27,8 @@
pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
{
put_prev_task(rq, prev);
-
- schedstat_inc(rq, sched_goidle);
+ update_idle_core(rq);
+ schedstat_inc(rq->sched_goidle);
return rq->idle;
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d5690b7..2516b8d 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -957,9 +957,8 @@
if (unlikely((s64)delta_exec <= 0))
return;
- /* Kick cpufreq (see the comment in linux/cpufreq.h). */
- if (cpu_of(rq) == smp_processor_id())
- cpufreq_trigger_update(rq_clock(rq));
+ /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+ cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c64fc51..055f935 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2,6 +2,7 @@
#include <linux/sched.h>
#include <linux/sched/sysctl.h>
#include <linux/sched/rt.h>
+#include <linux/u64_stats_sync.h>
#include <linux/sched/deadline.h>
#include <linux/binfmts.h>
#include <linux/mutex.h>
@@ -15,6 +16,12 @@
#include "cpudeadline.h"
#include "cpuacct.h"
+#ifdef CONFIG_SCHED_DEBUG
+#define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
+#else
+#define SCHED_WARN_ON(x) ((void)(x))
+#endif
+
struct rq;
struct cpuidle_state;
@@ -565,6 +572,8 @@
*/
cpumask_var_t rto_mask;
struct cpupri cpupri;
+
+ unsigned long max_cpu_capacity;
};
extern struct root_domain def_root_domain;
@@ -597,7 +606,6 @@
#ifdef CONFIG_SMP
unsigned long last_load_update_tick;
#endif /* CONFIG_SMP */
- u64 nohz_stamp;
unsigned long nohz_flags;
#endif /* CONFIG_NO_HZ_COMMON */
#ifdef CONFIG_NO_HZ_FULL
@@ -723,6 +731,23 @@
#endif
}
+
+#ifdef CONFIG_SCHED_SMT
+
+extern struct static_key_false sched_smt_present;
+
+extern void __update_idle_core(struct rq *rq);
+
+static inline void update_idle_core(struct rq *rq)
+{
+ if (static_branch_unlikely(&sched_smt_present))
+ __update_idle_core(rq);
+}
+
+#else
+static inline void update_idle_core(struct rq *rq) { }
+#endif
+
DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
@@ -857,8 +882,8 @@
DECLARE_PER_CPU(struct sched_domain *, sd_llc);
DECLARE_PER_CPU(int, sd_llc_size);
DECLARE_PER_CPU(int, sd_llc_id);
+DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
DECLARE_PER_CPU(struct sched_domain *, sd_numa);
-DECLARE_PER_CPU(struct sched_domain *, sd_busy);
DECLARE_PER_CPU(struct sched_domain *, sd_asym);
struct sched_group_capacity {
@@ -870,10 +895,6 @@
unsigned int capacity;
unsigned long next_update;
int imbalance; /* XXX unrelated to capacity but shared group state */
- /*
- * Number of busy cpus in this group.
- */
- atomic_t nr_busy_cpus;
unsigned long cpumask[0]; /* iteration mask */
};
@@ -1000,7 +1021,11 @@
* per-task data have been completed by this moment.
*/
smp_wmb();
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ p->cpu = cpu;
+#else
task_thread_info(p)->cpu = cpu;
+#endif
p->wake_cpu = cpu;
#endif
}
@@ -1260,6 +1285,11 @@
prev->sched_class->put_prev_task(rq, prev);
}
+static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
+{
+ curr->sched_class->set_curr_task(rq);
+}
+
#define sched_class_highest (&stop_sched_class)
#define for_each_class(class) \
for (class = sched_class_highest; class; class = class->next)
@@ -1290,7 +1320,7 @@
static inline struct cpuidle_state *idle_get_state(struct rq *rq)
{
- WARN_ON(!rcu_read_lock_held());
+ SCHED_WARN_ON(!rcu_read_lock_held());
return rq->idle_state;
}
#else
@@ -1710,52 +1740,28 @@
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+struct irqtime {
+ u64 hardirq_time;
+ u64 softirq_time;
+ u64 irq_start_time;
+ struct u64_stats_sync sync;
+};
-DECLARE_PER_CPU(u64, cpu_hardirq_time);
-DECLARE_PER_CPU(u64, cpu_softirq_time);
-
-#ifndef CONFIG_64BIT
-DECLARE_PER_CPU(seqcount_t, irq_time_seq);
-
-static inline void irq_time_write_begin(void)
-{
- __this_cpu_inc(irq_time_seq.sequence);
- smp_wmb();
-}
-
-static inline void irq_time_write_end(void)
-{
- smp_wmb();
- __this_cpu_inc(irq_time_seq.sequence);
-}
+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
static inline u64 irq_time_read(int cpu)
{
- u64 irq_time;
- unsigned seq;
+ struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
+ unsigned int seq;
+ u64 total;
do {
- seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
- irq_time = per_cpu(cpu_softirq_time, cpu) +
- per_cpu(cpu_hardirq_time, cpu);
- } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
+ seq = __u64_stats_fetch_begin(&irqtime->sync);
+ total = irqtime->softirq_time + irqtime->hardirq_time;
+ } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
- return irq_time;
+ return total;
}
-#else /* CONFIG_64BIT */
-static inline void irq_time_write_begin(void)
-{
-}
-
-static inline void irq_time_write_end(void)
-{
-}
-
-static inline u64 irq_time_read(int cpu)
-{
- return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
-}
-#endif /* CONFIG_64BIT */
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
#ifdef CONFIG_CPU_FREQ
@@ -1763,27 +1769,13 @@
/**
* cpufreq_update_util - Take a note about CPU utilization changes.
- * @time: Current time.
- * @util: Current utilization.
- * @max: Utilization ceiling.
+ * @rq: Runqueue to carry out the update for.
+ * @flags: Update reason flags.
*
- * This function is called by the scheduler on every invocation of
- * update_load_avg() on the CPU whose utilization is being updated.
+ * This function is called by the scheduler on the CPU whose utilization is
+ * being updated.
*
* It can only be called from RCU-sched read-side critical sections.
- */
-static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
-{
- struct update_util_data *data;
-
- data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
- if (data)
- data->func(data, time, util, max);
-}
-
-/**
- * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
- * @time: Current time.
*
* The way cpufreq is currently arranged requires it to evaluate the CPU
* performance state (frequency/voltage) on a regular basis to prevent it from
@@ -1797,13 +1789,23 @@
* but that really is a band-aid. Going forward it should be replaced with
* solutions targeted more specifically at RT and DL tasks.
*/
-static inline void cpufreq_trigger_update(u64 time)
+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
{
- cpufreq_update_util(time, ULONG_MAX, 0);
+ struct update_util_data *data;
+
+ data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
+ if (data)
+ data->func(data, rq_clock(rq), flags);
+}
+
+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
+{
+ if (cpu_of(rq) == smp_processor_id())
+ cpufreq_update_util(rq, flags);
}
#else
-static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
-static inline void cpufreq_trigger_update(u64 time) {}
+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
#endif /* CONFIG_CPU_FREQ */
#ifdef arch_scale_freq_capacity
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 78955cb..34659a8 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -29,11 +29,12 @@
if (rq)
rq->rq_sched_info.run_delay += delta;
}
-# define schedstat_enabled() static_branch_unlikely(&sched_schedstats)
-# define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
-# define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
-# define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
-# define schedstat_val(rq, field) ((schedstat_enabled()) ? (rq)->field : 0)
+#define schedstat_enabled() static_branch_unlikely(&sched_schedstats)
+#define schedstat_inc(var) do { if (schedstat_enabled()) { var++; } } while (0)
+#define schedstat_add(var, amt) do { if (schedstat_enabled()) { var += (amt); } } while (0)
+#define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
+#define schedstat_val(var) (var)
+#define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0)
#else /* !CONFIG_SCHEDSTATS */
static inline void
@@ -45,12 +46,13 @@
static inline void
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
{}
-# define schedstat_enabled() 0
-# define schedstat_inc(rq, field) do { } while (0)
-# define schedstat_add(rq, field, amt) do { } while (0)
-# define schedstat_set(var, val) do { } while (0)
-# define schedstat_val(rq, field) 0
-#endif
+#define schedstat_enabled() 0
+#define schedstat_inc(var) do { } while (0)
+#define schedstat_add(var, amt) do { } while (0)
+#define schedstat_set(var, val) do { } while (0)
+#define schedstat_val(var) 0
+#define schedstat_val_or_zero(var) 0
+#endif /* CONFIG_SCHEDSTATS */
#ifdef CONFIG_SCHED_INFO
static inline void sched_info_reset_dequeued(struct task_struct *t)
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index f15d6b6..4f70535 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -196,27 +196,48 @@
}
EXPORT_SYMBOL(prepare_to_wait_exclusive);
+void init_wait_entry(wait_queue_t *wait, int flags)
+{
+ wait->flags = flags;
+ wait->private = current;
+ wait->func = autoremove_wake_function;
+ INIT_LIST_HEAD(&wait->task_list);
+}
+EXPORT_SYMBOL(init_wait_entry);
+
long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
-
- if (signal_pending_state(state, current))
- return -ERESTARTSYS;
-
- wait->private = current;
- wait->func = autoremove_wake_function;
+ long ret = 0;
spin_lock_irqsave(&q->lock, flags);
- if (list_empty(&wait->task_list)) {
- if (wait->flags & WQ_FLAG_EXCLUSIVE)
- __add_wait_queue_tail(q, wait);
- else
- __add_wait_queue(q, wait);
+ if (unlikely(signal_pending_state(state, current))) {
+ /*
+ * Exclusive waiter must not fail if it was selected by wakeup,
+ * it should "consume" the condition we were waiting for.
+ *
+ * The caller will recheck the condition and return success if
+ * we were already woken up, we can not miss the event because
+ * wakeup locks/unlocks the same q->lock.
+ *
+ * But we need to ensure that set-condition + wakeup after that
+ * can't see us, it should wake up another exclusive waiter if
+ * we fail.
+ */
+ list_del_init(&wait->task_list);
+ ret = -ERESTARTSYS;
+ } else {
+ if (list_empty(&wait->task_list)) {
+ if (wait->flags & WQ_FLAG_EXCLUSIVE)
+ __add_wait_queue_tail(q, wait);
+ else
+ __add_wait_queue(q, wait);
+ }
+ set_current_state(state);
}
- set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(prepare_to_wait_event);
@@ -255,39 +276,6 @@
}
EXPORT_SYMBOL(finish_wait);
-/**
- * abort_exclusive_wait - abort exclusive waiting in a queue
- * @q: waitqueue waited on
- * @wait: wait descriptor
- * @mode: runstate of the waiter to be woken
- * @key: key to identify a wait bit queue or %NULL
- *
- * Sets current thread back to running state and removes
- * the wait descriptor from the given waitqueue if still
- * queued.
- *
- * Wakes up the next waiter if the caller is concurrently
- * woken up through the queue.
- *
- * This prevents waiter starvation where an exclusive waiter
- * aborts and is woken up concurrently and no one wakes up
- * the next waiter.
- */
-void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
- unsigned int mode, void *key)
-{
- unsigned long flags;
-
- __set_current_state(TASK_RUNNING);
- spin_lock_irqsave(&q->lock, flags);
- if (!list_empty(&wait->task_list))
- list_del_init(&wait->task_list);
- else if (waitqueue_active(q))
- __wake_up_locked_key(q, mode, key);
- spin_unlock_irqrestore(&q->lock, flags);
-}
-EXPORT_SYMBOL(abort_exclusive_wait);
-
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int ret = default_wake_function(wait, mode, sync, key);
@@ -425,20 +413,29 @@
__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
wait_bit_action_f *action, unsigned mode)
{
- do {
- int ret;
+ int ret = 0;
+ for (;;) {
prepare_to_wait_exclusive(wq, &q->wait, mode);
- if (!test_bit(q->key.bit_nr, q->key.flags))
- continue;
- ret = action(&q->key, mode);
- if (!ret)
- continue;
- abort_exclusive_wait(wq, &q->wait, mode, &q->key);
- return ret;
- } while (test_and_set_bit(q->key.bit_nr, q->key.flags));
- finish_wait(wq, &q->wait);
- return 0;
+ if (test_bit(q->key.bit_nr, q->key.flags)) {
+ ret = action(&q->key, mode);
+ /*
+ * See the comment in prepare_to_wait_event().
+ * finish_wait() does not necessarily takes wq->lock,
+ * but test_and_set_bit() implies mb() which pairs with
+ * smp_mb__after_atomic() before wake_up_page().
+ */
+ if (ret)
+ finish_wait(wq, &q->wait);
+ }
+ if (!test_and_set_bit(q->key.bit_nr, q->key.flags)) {
+ if (!ret)
+ finish_wait(wq, &q->wait);
+ return 0;
+ } else if (ret) {
+ return ret;
+ }
+ }
}
EXPORT_SYMBOL(__wait_on_bit_lock);
diff --git a/kernel/signal.c b/kernel/signal.c
index af21afc..75761ac 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3044,6 +3044,11 @@
}
EXPORT_SYMBOL(kernel_sigaction);
+void __weak sigaction_compat_abi(struct k_sigaction *act,
+ struct k_sigaction *oact)
+{
+}
+
int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
{
struct task_struct *p = current, *t;
@@ -3059,6 +3064,8 @@
if (oact)
*oact = *k;
+ sigaction_compat_abi(act, oact);
+
if (act) {
sigdelsetmask(&act->sa.sa_mask,
sigmask(SIGKILL) | sigmask(SIGSTOP));
diff --git a/kernel/smp.c b/kernel/smp.c
index 3aa642d..bba3b20 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -14,6 +14,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/sched.h>
+#include <linux/hypervisor.h>
#include "smpboot.h"
@@ -724,3 +725,54 @@
preempt_enable();
}
EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
+
+/**
+ * smp_call_on_cpu - Call a function on a specific cpu
+ *
+ * Used to call a function on a specific cpu and wait for it to return.
+ * Optionally make sure the call is done on a specified physical cpu via vcpu
+ * pinning in order to support virtualized environments.
+ */
+struct smp_call_on_cpu_struct {
+ struct work_struct work;
+ struct completion done;
+ int (*func)(void *);
+ void *data;
+ int ret;
+ int cpu;
+};
+
+static void smp_call_on_cpu_callback(struct work_struct *work)
+{
+ struct smp_call_on_cpu_struct *sscs;
+
+ sscs = container_of(work, struct smp_call_on_cpu_struct, work);
+ if (sscs->cpu >= 0)
+ hypervisor_pin_vcpu(sscs->cpu);
+ sscs->ret = sscs->func(sscs->data);
+ if (sscs->cpu >= 0)
+ hypervisor_pin_vcpu(-1);
+
+ complete(&sscs->done);
+}
+
+int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
+{
+ struct smp_call_on_cpu_struct sscs = {
+ .done = COMPLETION_INITIALIZER_ONSTACK(sscs.done),
+ .func = func,
+ .data = par,
+ .cpu = phys ? cpu : -1,
+ };
+
+ INIT_WORK_ONSTACK(&sscs.work, smp_call_on_cpu_callback);
+
+ if (cpu >= nr_cpu_ids || !cpu_online(cpu))
+ return -ENXIO;
+
+ queue_work_on(cpu, system_wq, &sscs.work);
+ wait_for_completion(&sscs.done);
+
+ return sscs.ret;
+}
+EXPORT_SYMBOL_GPL(smp_call_on_cpu);
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 13bc43d..fc0d8270 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -122,12 +122,12 @@
if (kthread_should_park()) {
__set_current_state(TASK_RUNNING);
- preempt_enable();
if (ht->park && td->status == HP_THREAD_ACTIVE) {
BUG_ON(td->cpu != smp_processor_id());
ht->park(td->cpu);
td->status = HP_THREAD_PARKED;
}
+ preempt_enable();
kthread_parkme();
/* We might have been woken for stop */
continue;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 17caf4b..6676264 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -78,6 +78,17 @@
}
/*
+ * If ksoftirqd is scheduled, we do not want to process pending softirqs
+ * right now. Let ksoftirqd handle this at its own rate, to get fairness.
+ */
+static bool ksoftirqd_running(void)
+{
+ struct task_struct *tsk = __this_cpu_read(ksoftirqd);
+
+ return tsk && (tsk->state == TASK_RUNNING);
+}
+
+/*
* preempt_count and SOFTIRQ_OFFSET usage:
* - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
* softirq processing.
@@ -313,7 +324,7 @@
pending = local_softirq_pending();
- if (pending)
+ if (pending && !ksoftirqd_running())
do_softirq_own_stack();
local_irq_restore(flags);
@@ -340,6 +351,9 @@
static inline void invoke_softirq(void)
{
+ if (ksoftirqd_running())
+ return;
+
if (!force_irqthreads) {
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
/*
@@ -700,7 +714,7 @@
BUG();
}
-static void takeover_tasklets(unsigned int cpu)
+static int takeover_tasklets(unsigned int cpu)
{
/* CPU is dead, so no lock needed. */
local_irq_disable();
@@ -723,27 +737,12 @@
raise_softirq_irqoff(HI_SOFTIRQ);
local_irq_enable();
+ return 0;
}
+#else
+#define takeover_tasklets NULL
#endif /* CONFIG_HOTPLUG_CPU */
-static int cpu_callback(struct notifier_block *nfb, unsigned long action,
- void *hcpu)
-{
- switch (action) {
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- takeover_tasklets((unsigned long)hcpu);
- break;
-#endif /* CONFIG_HOTPLUG_CPU */
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block cpu_nfb = {
- .notifier_call = cpu_callback
-};
-
static struct smp_hotplug_thread softirq_threads = {
.store = &ksoftirqd,
.thread_should_run = ksoftirqd_should_run,
@@ -753,8 +752,8 @@
static __init int spawn_ksoftirqd(void)
{
- register_cpu_notifier(&cpu_nfb);
-
+ cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
+ takeover_tasklets);
BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
return 0;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 4a1ca5f..ec9ab2f 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -20,7 +20,6 @@
#include <linux/kallsyms.h>
#include <linux/smpboot.h>
#include <linux/atomic.h>
-#include <linux/lglock.h>
#include <linux/nmi.h>
/*
@@ -47,13 +46,9 @@
static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
static bool stop_machine_initialized = false;
-/*
- * Avoids a race between stop_two_cpus and global stop_cpus, where
- * the stoppers could get queued up in reverse order, leading to
- * system deadlock. Using an lglock means stop_two_cpus remains
- * relatively cheap.
- */
-DEFINE_STATIC_LGLOCK(stop_cpus_lock);
+/* static data for stop_cpus */
+static DEFINE_MUTEX(stop_cpus_mutex);
+static bool stop_cpus_in_progress;
static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
{
@@ -126,6 +121,11 @@
cpu_stop_init_done(&done, 1);
if (!cpu_stop_queue_work(cpu, &work))
return -ENOENT;
+ /*
+ * In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
+ * cycle by doing a preemption:
+ */
+ cond_resched();
wait_for_completion(&done.completion);
return done.ret;
}
@@ -230,14 +230,26 @@
struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
int err;
-
- lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
+retry:
spin_lock_irq(&stopper1->lock);
spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
err = -ENOENT;
if (!stopper1->enabled || !stopper2->enabled)
goto unlock;
+ /*
+ * Ensure that if we race with __stop_cpus() the stoppers won't get
+ * queued up in reverse order leading to system deadlock.
+ *
+ * We can't miss stop_cpus_in_progress if queue_stop_cpus_work() has
+ * queued a work on cpu1 but not on cpu2, we hold both locks.
+ *
+ * It can be falsely true but it is safe to spin until it is cleared,
+ * queue_stop_cpus_work() does everything under preempt_disable().
+ */
+ err = -EDEADLK;
+ if (unlikely(stop_cpus_in_progress))
+ goto unlock;
err = 0;
__cpu_stop_queue_work(stopper1, work1);
@@ -245,8 +257,12 @@
unlock:
spin_unlock(&stopper2->lock);
spin_unlock_irq(&stopper1->lock);
- lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
+ if (unlikely(err == -EDEADLK)) {
+ while (stop_cpus_in_progress)
+ cpu_relax();
+ goto retry;
+ }
return err;
}
/**
@@ -316,9 +332,6 @@
return cpu_stop_queue_work(cpu, work_buf);
}
-/* static data for stop_cpus */
-static DEFINE_MUTEX(stop_cpus_mutex);
-
static bool queue_stop_cpus_work(const struct cpumask *cpumask,
cpu_stop_fn_t fn, void *arg,
struct cpu_stop_done *done)
@@ -332,7 +345,8 @@
* preempted by a stopper which might wait for other stoppers
* to enter @fn which can lead to deadlock.
*/
- lg_global_lock(&stop_cpus_lock);
+ preempt_disable();
+ stop_cpus_in_progress = true;
for_each_cpu(cpu, cpumask) {
work = &per_cpu(cpu_stopper.stop_work, cpu);
work->fn = fn;
@@ -341,7 +355,8 @@
if (cpu_stop_queue_work(cpu, work))
queued = true;
}
- lg_global_unlock(&stop_cpus_lock);
+ stop_cpus_in_progress = false;
+ preempt_enable();
return queued;
}
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 6a5a310..7e4fad7 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -600,9 +600,18 @@
*/
if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
/* Override clocksource cannot be used. */
- pr_warn("Override clocksource %s is not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
- cs->name);
- override_name[0] = 0;
+ if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
+ pr_warn("Override clocksource %s is unstable and not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
+ cs->name);
+ override_name[0] = 0;
+ } else {
+ /*
+ * The override cannot be currently verified.
+ * Deferring to let the watchdog check.
+ */
+ pr_info("Override clocksource %s is not currently HRT compatible - deferring\n",
+ cs->name);
+ }
} else
/* Override clocksource can be used. */
best = cs;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 9ba7c82..bb5ec42 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -307,7 +307,7 @@
*/
ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
{
- ktime_t res = ktime_add(lhs, rhs);
+ ktime_t res = ktime_add_unsafe(lhs, rhs);
/*
* We use KTIME_SEC_MAX here, the maximum timeout which we can
@@ -703,7 +703,7 @@
static DECLARE_WORK(hrtimer_work, clock_was_set_work);
/*
- * Called from timekeeping and resume code to reprogramm the hrtimer
+ * Called from timekeeping and resume code to reprogram the hrtimer
* interrupt device on all cpus.
*/
void clock_was_set_delayed(void)
@@ -1241,7 +1241,7 @@
/*
* Note: We clear the running state after enqueue_hrtimer and
- * we do not reprogramm the event hardware. Happens either in
+ * we do not reprogram the event hardware. Happens either in
* hrtimer_start_range_ns() or in hrtimer_interrupt()
*
* Note: Because we dropped the cpu_base->lock above,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 2ec7c00..3bcb61b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -186,10 +186,13 @@
return false;
}
-static bool can_stop_full_tick(struct tick_sched *ts)
+static bool can_stop_full_tick(int cpu, struct tick_sched *ts)
{
WARN_ON_ONCE(!irqs_disabled());
+ if (unlikely(!cpu_online(cpu)))
+ return false;
+
if (check_tick_dependency(&tick_dep_mask))
return false;
@@ -843,7 +846,7 @@
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return;
- if (can_stop_full_tick(ts))
+ if (can_stop_full_tick(cpu, ts))
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get());
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 667b933..bd62fb8 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -780,7 +780,7 @@
{
struct timespec64 res;
- set_normalized_timespec64(&res, lhs.tv_sec + rhs.tv_sec,
+ set_normalized_timespec64(&res, (timeu64_t) lhs.tv_sec + rhs.tv_sec,
lhs.tv_nsec + rhs.tv_nsec);
if (unlikely(res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)) {
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index 107310a..ca9fb80 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -75,5 +75,7 @@
int bin = min(fls(t->tv_sec), NUM_BINS-1);
sleep_time_bin[bin]++;
+ pr_info("Suspended for %lld.%03lu seconds\n", (s64)t->tv_sec,
+ t->tv_nsec / NSEC_PER_MSEC);
}
diff --git a/kernel/torture.c b/kernel/torture.c
index 75961b3..0d887eb 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -43,6 +43,7 @@
#include <linux/stat.h>
#include <linux/slab.h>
#include <linux/trace_clock.h>
+#include <linux/ktime.h>
#include <asm/byteorder.h>
#include <linux/torture.h>
@@ -446,9 +447,8 @@
* Variables for auto-shutdown. This allows "lights out" torture runs
* to be fully scripted.
*/
-static int shutdown_secs; /* desired test duration in seconds. */
static struct task_struct *shutdown_task;
-static unsigned long shutdown_time; /* jiffies to system shutdown. */
+static ktime_t shutdown_time; /* time to system shutdown. */
static void (*torture_shutdown_hook)(void);
/*
@@ -471,20 +471,20 @@
*/
static int torture_shutdown(void *arg)
{
- long delta;
- unsigned long jiffies_snap;
+ ktime_t ktime_snap;
VERBOSE_TOROUT_STRING("torture_shutdown task started");
- jiffies_snap = jiffies;
- while (ULONG_CMP_LT(jiffies_snap, shutdown_time) &&
+ ktime_snap = ktime_get();
+ while (ktime_before(ktime_snap, shutdown_time) &&
!torture_must_stop()) {
- delta = shutdown_time - jiffies_snap;
if (verbose)
pr_alert("%s" TORTURE_FLAG
- "torture_shutdown task: %lu jiffies remaining\n",
- torture_type, delta);
- schedule_timeout_interruptible(delta);
- jiffies_snap = jiffies;
+ "torture_shutdown task: %llu ms remaining\n",
+ torture_type,
+ ktime_ms_delta(shutdown_time, ktime_snap));
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_hrtimeout(&shutdown_time, HRTIMER_MODE_ABS);
+ ktime_snap = ktime_get();
}
if (torture_must_stop()) {
torture_kthread_stopping("torture_shutdown");
@@ -511,10 +511,9 @@
{
int ret = 0;
- shutdown_secs = ssecs;
torture_shutdown_hook = cleanup;
- if (shutdown_secs > 0) {
- shutdown_time = jiffies + shutdown_secs * HZ;
+ if (ssecs > 0) {
+ shutdown_time = ktime_add(ktime_get(), ktime_set(ssecs, 0));
ret = torture_create_kthread(torture_shutdown, NULL,
shutdown_task);
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index f4b86e8..ba33267 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -24,11 +24,6 @@
help
See Documentation/trace/ftrace-design.txt
-config HAVE_FUNCTION_GRAPH_FP_TEST
- bool
- help
- See Documentation/trace/ftrace-design.txt
-
config HAVE_DYNAMIC_FTRACE
bool
help
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dade4c9..37824d9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4123,6 +4123,30 @@
"\t\t\t traces\n"
#endif
#endif /* CONFIG_STACK_TRACER */
+#ifdef CONFIG_KPROBE_EVENT
+ " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
+ "\t\t\t Write into this file to define/undefine new trace events.\n"
+#endif
+#ifdef CONFIG_UPROBE_EVENT
+ " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
+ "\t\t\t Write into this file to define/undefine new trace events.\n"
+#endif
+#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
+ "\t accepts: event-definitions (one definition per line)\n"
+ "\t Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
+ "\t -:[<group>/]<event>\n"
+#ifdef CONFIG_KPROBE_EVENT
+ "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
+#endif
+#ifdef CONFIG_UPROBE_EVENT
+ "\t place: <path>:<offset>\n"
+#endif
+ "\t args: <name>=fetcharg[:type]\n"
+ "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
+ "\t $stack<index>, $stack, $retval, $comm\n"
+ "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
+ "\t b<bit-width>@<bit-offset>/<container-size>\n"
+#endif
" events/\t\t- Directory containing all trace event subsystems:\n"
" enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
" events/<system>/\t- Directory containing all trace events for <system>:\n"
@@ -5124,19 +5148,20 @@
struct trace_iterator *iter = filp->private_data;
ssize_t sret;
- /* return any leftover data */
- sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
- if (sret != -EBUSY)
- return sret;
-
- trace_seq_init(&iter->seq);
-
/*
* Avoid more than one consumer on a single file descriptor
* This is just a matter of traces coherency, the ring buffer itself
* is protected.
*/
mutex_lock(&iter->mutex);
+
+ /* return any leftover data */
+ sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+ if (sret != -EBUSY)
+ goto out;
+
+ trace_seq_init(&iter->seq);
+
if (iter->trace->read) {
sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
if (sret)
@@ -6163,9 +6188,6 @@
return -EBUSY;
#endif
- if (splice_grow_spd(pipe, &spd))
- return -ENOMEM;
-
if (*ppos & (PAGE_SIZE - 1))
return -EINVAL;
@@ -6175,6 +6197,9 @@
len &= PAGE_MASK;
}
+ if (splice_grow_spd(pipe, &spd))
+ return -ENOMEM;
+
again:
trace_access_lock(iter->cpu_file);
entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
@@ -6232,19 +6257,21 @@
/* did we read anything? */
if (!spd.nr_pages) {
if (ret)
- return ret;
+ goto out;
+ ret = -EAGAIN;
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
- return -EAGAIN;
+ goto out;
ret = wait_on_pipe(iter, true);
if (ret)
- return ret;
+ goto out;
goto again;
}
ret = splice_to_pipe(pipe, &spd);
+out:
splice_shrink_spd(&spd);
return ret;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 7363ccf..0cbe38a 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -119,7 +119,7 @@
/* Add a function return address to the trace stack on thread info.*/
int
ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
- unsigned long frame_pointer)
+ unsigned long frame_pointer, unsigned long *retp)
{
unsigned long long calltime;
int index;
@@ -171,7 +171,12 @@
current->ret_stack[index].func = func;
current->ret_stack[index].calltime = calltime;
current->ret_stack[index].subtime = 0;
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
current->ret_stack[index].fp = frame_pointer;
+#endif
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+ current->ret_stack[index].retp = retp;
+#endif
*depth = current->curr_ret_stack;
return 0;
@@ -204,7 +209,7 @@
return;
}
-#if defined(CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST) && !defined(CC_USING_FENTRY)
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
/*
* The arch may choose to record the frame pointer used
* and check it here to make sure that it is what we expect it
@@ -279,6 +284,64 @@
return ret;
}
+/**
+ * ftrace_graph_ret_addr - convert a potentially modified stack return address
+ * to its original value
+ *
+ * This function can be called by stack unwinding code to convert a found stack
+ * return address ('ret') to its original value, in case the function graph
+ * tracer has modified it to be 'return_to_handler'. If the address hasn't
+ * been modified, the unchanged value of 'ret' is returned.
+ *
+ * 'idx' is a state variable which should be initialized by the caller to zero
+ * before the first call.
+ *
+ * 'retp' is a pointer to the return address on the stack. It's ignored if
+ * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
+ */
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
+ unsigned long ret, unsigned long *retp)
+{
+ int index = task->curr_ret_stack;
+ int i;
+
+ if (ret != (unsigned long)return_to_handler)
+ return ret;
+
+ if (index < -1)
+ index += FTRACE_NOTRACE_DEPTH;
+
+ if (index < 0)
+ return ret;
+
+ for (i = 0; i <= index; i++)
+ if (task->ret_stack[i].retp == retp)
+ return task->ret_stack[i].ret;
+
+ return ret;
+}
+#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
+unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
+ unsigned long ret, unsigned long *retp)
+{
+ int task_idx;
+
+ if (ret != (unsigned long)return_to_handler)
+ return ret;
+
+ task_idx = task->curr_ret_stack;
+
+ if (!task->ret_stack || task_idx < *idx)
+ return ret;
+
+ task_idx -= *idx;
+ (*idx)++;
+
+ return task->ret_stack[task_idx].ret;
+}
+#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
+
int __trace_graph_entry(struct trace_array *tr,
struct ftrace_graph_ent *trace,
unsigned long flags,
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 9aedb0b..eb6c9f1 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -253,6 +253,10 @@
ASSIGN_FETCH_TYPE(s16, u16, 1),
ASSIGN_FETCH_TYPE(s32, u32, 1),
ASSIGN_FETCH_TYPE(s64, u64, 1),
+ ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
ASSIGN_FETCH_TYPE_END
};
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 74e80a5..8c0553d 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -36,24 +36,28 @@
};
/* Printing in basic type function template */
-#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt) \
-int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \
+#define DEFINE_BASIC_PRINT_TYPE_FUNC(tname, type, fmt) \
+int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, const char *name, \
void *data, void *ent) \
{ \
trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \
return !trace_seq_has_overflowed(s); \
} \
-const char PRINT_TYPE_FMT_NAME(type)[] = fmt; \
-NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type));
+const char PRINT_TYPE_FMT_NAME(tname)[] = fmt; \
+NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(tname));
-DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "0x%Lx")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%d")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u8, u8, "%u")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u16, u16, "%u")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u32, u32, "%u")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u64, u64, "%Lu")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s8, s8, "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s16, s16, "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s32, s32, "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s64, s64, "%Ld")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x8, u8, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x16, u16, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x32, u32, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x64, u64, "0x%Lx")
/* Print type function for string type */
int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name,
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 45400ca..0c0ae54 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -149,6 +149,11 @@
DECLARE_BASIC_PRINT_TYPE_FUNC(s16);
DECLARE_BASIC_PRINT_TYPE_FUNC(s32);
DECLARE_BASIC_PRINT_TYPE_FUNC(s64);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x8);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x16);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x32);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x64);
+
DECLARE_BASIC_PRINT_TYPE_FUNC(string);
#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
@@ -203,7 +208,7 @@
DEFINE_FETCH_##method(u64)
/* Default (unsigned long) fetch type */
-#define __DEFAULT_FETCH_TYPE(t) u##t
+#define __DEFAULT_FETCH_TYPE(t) x##t
#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
@@ -234,6 +239,10 @@
#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
+/* If ptype is an alias of atype, use this macro (show atype in format) */
+#define ASSIGN_FETCH_TYPE_ALIAS(ptype, atype, ftype, sign) \
+ __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #atype)
+
#define ASSIGN_FETCH_TYPE_END {}
#define FETCH_TYPE_STRING 0
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c534854..7a68732 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -211,6 +211,10 @@
ASSIGN_FETCH_TYPE(s16, u16, 1),
ASSIGN_FETCH_TYPE(s32, u32, 1),
ASSIGN_FETCH_TYPE(s64, u64, 1),
+ ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
ASSIGN_FETCH_TYPE_END
};
diff --git a/kernel/up.c b/kernel/up.c
index 1760bf3..ee81ac9 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -6,6 +6,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/smp.h>
+#include <linux/hypervisor.h>
int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
int wait)
@@ -82,3 +83,20 @@
preempt_enable();
}
EXPORT_SYMBOL(on_each_cpu_cond);
+
+int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
+{
+ int ret;
+
+ if (cpu != 0)
+ return -ENXIO;
+
+ if (phys)
+ hypervisor_pin_vcpu(0);
+ ret = func(par);
+ if (phys)
+ hypervisor_pin_vcpu(-1);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(smp_call_on_cpu);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2e2cca5..cab7405 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -821,7 +821,7 @@
help
Say Y here to enable the kernel to detect "hung tasks",
which are bugs that cause the task to be stuck in
- uninterruptible "D" state indefinitiley.
+ uninterruptible "D" state indefinitely.
When a hung task is detected, the kernel will print the
current stack trace (which you should report), but the
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
index 707ca24..0e2c9a1 100644
--- a/lib/cpu-notifier-error-inject.c
+++ b/lib/cpu-notifier-error-inject.c
@@ -8,16 +8,47 @@
module_param(priority, int, 0);
MODULE_PARM_DESC(priority, "specify cpu notifier priority");
+#define UP_PREPARE 0
+#define UP_PREPARE_FROZEN 0
+#define DOWN_PREPARE 0
+#define DOWN_PREPARE_FROZEN 0
+
static struct notifier_err_inject cpu_notifier_err_inject = {
.actions = {
- { NOTIFIER_ERR_INJECT_ACTION(CPU_UP_PREPARE) },
- { NOTIFIER_ERR_INJECT_ACTION(CPU_UP_PREPARE_FROZEN) },
- { NOTIFIER_ERR_INJECT_ACTION(CPU_DOWN_PREPARE) },
- { NOTIFIER_ERR_INJECT_ACTION(CPU_DOWN_PREPARE_FROZEN) },
+ { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE) },
+ { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE_FROZEN) },
+ { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE) },
+ { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE_FROZEN) },
{}
}
};
+static int notf_err_handle(struct notifier_err_inject_action *action)
+{
+ int ret;
+
+ ret = action->error;
+ if (ret)
+ pr_info("Injecting error (%d) to %s\n", ret, action->name);
+ return ret;
+}
+
+static int notf_err_inj_up_prepare(unsigned int cpu)
+{
+ if (!cpuhp_tasks_frozen)
+ return notf_err_handle(&cpu_notifier_err_inject.actions[0]);
+ else
+ return notf_err_handle(&cpu_notifier_err_inject.actions[1]);
+}
+
+static int notf_err_inj_dead(unsigned int cpu)
+{
+ if (!cpuhp_tasks_frozen)
+ return notf_err_handle(&cpu_notifier_err_inject.actions[2]);
+ else
+ return notf_err_handle(&cpu_notifier_err_inject.actions[3]);
+}
+
static struct dentry *dir;
static int err_inject_init(void)
@@ -29,7 +60,10 @@
if (IS_ERR(dir))
return PTR_ERR(dir);
- err = register_hotcpu_notifier(&cpu_notifier_err_inject.nb);
+ err = cpuhp_setup_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE,
+ "cpu-err-notif:prepare",
+ notf_err_inj_up_prepare,
+ notf_err_inj_dead);
if (err)
debugfs_remove_recursive(dir);
@@ -38,7 +72,7 @@
static void err_inject_exit(void)
{
- unregister_hotcpu_notifier(&cpu_notifier_err_inject.nb);
+ cpuhp_remove_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE);
debugfs_remove_recursive(dir);
}
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index fcfa193..06f02f6 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -22,6 +22,7 @@
#include <linux/stacktrace.h>
#include <linux/dma-debug.h>
#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/export.h>
@@ -1164,11 +1165,32 @@
put_hash_bucket(bucket, &flags);
}
-static void check_for_stack(struct device *dev, void *addr)
+static void check_for_stack(struct device *dev,
+ struct page *page, size_t offset)
{
- if (object_is_on_stack(addr))
- err_printk(dev, NULL, "DMA-API: device driver maps memory from "
- "stack [addr=%p]\n", addr);
+ void *addr;
+ struct vm_struct *stack_vm_area = task_stack_vm_area(current);
+
+ if (!stack_vm_area) {
+ /* Stack is direct-mapped. */
+ if (PageHighMem(page))
+ return;
+ addr = page_address(page) + offset;
+ if (object_is_on_stack(addr))
+ err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr);
+ } else {
+ /* Stack is vmalloced. */
+ int i;
+
+ for (i = 0; i < stack_vm_area->nr_pages; i++) {
+ if (page != stack_vm_area->pages[i])
+ continue;
+
+ addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
+ err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr);
+ break;
+ }
+ }
}
static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
@@ -1291,10 +1313,11 @@
if (map_single)
entry->type = dma_debug_single;
+ check_for_stack(dev, page, offset);
+
if (!PageHighMem(page)) {
void *addr = page_address(page) + offset;
- check_for_stack(dev, addr);
check_for_illegal_area(dev, addr, size);
}
@@ -1386,8 +1409,9 @@
entry->sg_call_ents = nents;
entry->sg_mapped_ents = mapped_ents;
+ check_for_stack(dev, sg_page(s), s->offset);
+
if (!PageHighMem(sg_page(s))) {
- check_for_stack(dev, sg_virt(s));
check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
}
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
index 836f7db..2be5569 100644
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -184,30 +184,21 @@
}
EXPORT_SYMBOL(irq_poll_init);
-static int irq_poll_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int irq_poll_cpu_dead(unsigned int cpu)
{
/*
* If a CPU goes away, splice its entries to the current CPU
* and trigger a run of the softirq
*/
- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
- int cpu = (unsigned long) hcpu;
+ local_irq_disable();
+ list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
+ this_cpu_ptr(&blk_cpu_iopoll));
+ __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+ local_irq_enable();
- local_irq_disable();
- list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
- this_cpu_ptr(&blk_cpu_iopoll));
- __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
- local_irq_enable();
- }
-
- return NOTIFY_OK;
+ return 0;
}
-static struct notifier_block irq_poll_cpu_notifier = {
- .notifier_call = irq_poll_cpu_notify,
-};
-
static __init int irq_poll_setup(void)
{
int i;
@@ -216,7 +207,8 @@
INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq);
- register_hotcpu_notifier(&irq_poll_cpu_notifier);
+ cpuhp_setup_state_nocalls(CPUHP_IRQ_POLL_DEAD, "irq_poll:dead", NULL,
+ irq_poll_cpu_dead);
return 0;
}
subsys_initcall(irq_poll_setup);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 1b7bf73..91f0727 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -105,10 +105,10 @@
#ifdef CONFIG_RADIX_TREE_MULTIORDER
if (radix_tree_is_internal_node(entry)) {
- unsigned long siboff = get_slot_offset(parent, entry);
- if (siboff < RADIX_TREE_MAP_SIZE) {
- offset = siboff;
- entry = rcu_dereference_raw(parent->slots[offset]);
+ if (is_sibling_entry(parent, entry)) {
+ void **sibentry = (void **) entry_to_node(entry);
+ offset = get_slot_offset(parent, sibentry);
+ entry = rcu_dereference_raw(*sibentry);
}
}
#endif
diff --git a/lib/syscall.c b/lib/syscall.c
index e30e039..63239e0 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -7,9 +7,19 @@
unsigned long args[6], unsigned int maxargs,
unsigned long *sp, unsigned long *pc)
{
- struct pt_regs *regs = task_pt_regs(target);
- if (unlikely(!regs))
+ struct pt_regs *regs;
+
+ if (!try_get_task_stack(target)) {
+ /* Task has no stack, so the task isn't in a syscall. */
+ *callno = -1;
+ return 0;
+ }
+
+ regs = task_pt_regs(target);
+ if (unlikely(!regs)) {
+ put_task_stack(target);
return -EAGAIN;
+ }
*sp = user_stack_pointer(regs);
*pc = instruction_pointer(regs);
@@ -18,6 +28,7 @@
if (*callno != -1L && maxargs > 0)
syscall_get_arguments(target, regs, 0, maxargs, args);
+ put_task_stack(target);
return 0;
}
diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c
index f0b323a..ae8d249 100644
--- a/lib/ucs2_string.c
+++ b/lib/ucs2_string.c
@@ -56,7 +56,7 @@
unsigned long i;
unsigned long j = 0;
- for (i = 0; i < ucs2_strlen(src); i++) {
+ for (i = 0; src[i]; i++) {
u16 c = src[i];
if (c >= 0x800)
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 22f4cd9..afcc550 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -76,8 +76,6 @@
no longer necessary to write zeros when GFP_ZERO is used on
allocation.
- Enabling page poisoning with this option will disable hibernation
-
If unsure, say N
bool
diff --git a/mm/debug.c b/mm/debug.c
index 8865bfb..74c7cae 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -42,9 +42,11 @@
void __dump_page(struct page *page, const char *reason)
{
+ int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
+
pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx",
- page, page_ref_count(page), page_mapcount(page),
- page->mapping, page->index);
+ page, page_ref_count(page), mapcount,
+ page->mapping, page_to_pgoff(page));
if (PageCompound(page))
pr_cont(" compound_mapcount: %d", compound_mapcount(page));
pr_cont("\n");
diff --git a/mm/filemap.c b/mm/filemap.c
index 8a287df..2d0986a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -110,6 +110,62 @@
* ->tasklist_lock (memory_failure, collect_procs_ao)
*/
+static int page_cache_tree_insert(struct address_space *mapping,
+ struct page *page, void **shadowp)
+{
+ struct radix_tree_node *node;
+ void **slot;
+ int error;
+
+ error = __radix_tree_create(&mapping->page_tree, page->index, 0,
+ &node, &slot);
+ if (error)
+ return error;
+ if (*slot) {
+ void *p;
+
+ p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+ if (!radix_tree_exceptional_entry(p))
+ return -EEXIST;
+
+ mapping->nrexceptional--;
+ if (!dax_mapping(mapping)) {
+ if (shadowp)
+ *shadowp = p;
+ if (node)
+ workingset_node_shadows_dec(node);
+ } else {
+ /* DAX can replace empty locked entry with a hole */
+ WARN_ON_ONCE(p !=
+ (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
+ RADIX_DAX_ENTRY_LOCK));
+ /* DAX accounts exceptional entries as normal pages */
+ if (node)
+ workingset_node_pages_dec(node);
+ /* Wakeup waiters for exceptional entry lock */
+ dax_wake_mapping_entry_waiter(mapping, page->index,
+ false);
+ }
+ }
+ radix_tree_replace_slot(slot, page);
+ mapping->nrpages++;
+ if (node) {
+ workingset_node_pages_inc(node);
+ /*
+ * Don't track node that contains actual pages.
+ *
+ * Avoid acquiring the list_lru lock if already
+ * untracked. The list_empty() test is safe as
+ * node->private_list is protected by
+ * mapping->tree_lock.
+ */
+ if (!list_empty(&node->private_list))
+ list_lru_del(&workingset_shadow_nodes,
+ &node->private_list);
+ }
+ return 0;
+}
+
static void page_cache_tree_delete(struct address_space *mapping,
struct page *page, void *shadow)
{
@@ -561,7 +617,7 @@
spin_lock_irqsave(&mapping->tree_lock, flags);
__delete_from_page_cache(old, NULL);
- error = radix_tree_insert(&mapping->page_tree, offset, new);
+ error = page_cache_tree_insert(mapping, new, NULL);
BUG_ON(error);
mapping->nrpages++;
@@ -584,62 +640,6 @@
}
EXPORT_SYMBOL_GPL(replace_page_cache_page);
-static int page_cache_tree_insert(struct address_space *mapping,
- struct page *page, void **shadowp)
-{
- struct radix_tree_node *node;
- void **slot;
- int error;
-
- error = __radix_tree_create(&mapping->page_tree, page->index, 0,
- &node, &slot);
- if (error)
- return error;
- if (*slot) {
- void *p;
-
- p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
- if (!radix_tree_exceptional_entry(p))
- return -EEXIST;
-
- mapping->nrexceptional--;
- if (!dax_mapping(mapping)) {
- if (shadowp)
- *shadowp = p;
- if (node)
- workingset_node_shadows_dec(node);
- } else {
- /* DAX can replace empty locked entry with a hole */
- WARN_ON_ONCE(p !=
- (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
- RADIX_DAX_ENTRY_LOCK));
- /* DAX accounts exceptional entries as normal pages */
- if (node)
- workingset_node_pages_dec(node);
- /* Wakeup waiters for exceptional entry lock */
- dax_wake_mapping_entry_waiter(mapping, page->index,
- false);
- }
- }
- radix_tree_replace_slot(slot, page);
- mapping->nrpages++;
- if (node) {
- workingset_node_pages_inc(node);
- /*
- * Don't track node that contains actual pages.
- *
- * Avoid acquiring the list_lru lock if already
- * untracked. The list_empty() test is safe as
- * node->private_list is protected by
- * mapping->tree_lock.
- */
- if (!list_empty(&node->private_list))
- list_lru_del(&workingset_shadow_nodes,
- &node->private_list);
- }
- return 0;
-}
-
static int __add_to_page_cache_locked(struct page *page,
struct address_space *mapping,
pgoff_t offset, gfp_t gfp_mask,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a6abd76..283583f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1138,9 +1138,6 @@
bool was_writable;
int flags = 0;
- /* A PROT_NONE fault should not end up here */
- BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
-
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
if (unlikely(!pmd_same(pmd, *fe->pmd)))
goto out_unlock;
@@ -1168,7 +1165,7 @@
}
/* See similar comment in do_numa_page for explanation */
- if (!(vma->vm_flags & VM_WRITE))
+ if (!pmd_write(pmd))
flags |= TNF_NO_GROUP;
/*
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 79c52d0..728d779 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -838,7 +838,8 @@
* value (scan code).
*/
-static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
+static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
+ struct vm_area_struct **vmap)
{
struct vm_area_struct *vma;
unsigned long hstart, hend;
@@ -846,7 +847,7 @@
if (unlikely(khugepaged_test_exit(mm)))
return SCAN_ANY_PROCESS;
- vma = find_vma(mm, address);
+ *vmap = vma = find_vma(mm, address);
if (!vma)
return SCAN_VMA_NULL;
@@ -881,6 +882,11 @@
.pmd = pmd,
};
+ /* we only decide to swapin, if there is enough young ptes */
+ if (referenced < HPAGE_PMD_NR/2) {
+ trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
+ return false;
+ }
fe.pte = pte_offset_map(pmd, address);
for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE;
fe.pte++, fe.address += PAGE_SIZE) {
@@ -888,17 +894,12 @@
if (!is_swap_pte(pteval))
continue;
swapped_in++;
- /* we only decide to swapin, if there is enough young ptes */
- if (referenced < HPAGE_PMD_NR/2) {
- trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
- return false;
- }
ret = do_swap_page(&fe, pteval);
/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
if (ret & VM_FAULT_RETRY) {
down_read(&mm->mmap_sem);
- if (hugepage_vma_revalidate(mm, address)) {
+ if (hugepage_vma_revalidate(mm, address, &fe.vma)) {
/* vma is no longer available, don't continue to swapin */
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false;
@@ -923,7 +924,6 @@
static void collapse_huge_page(struct mm_struct *mm,
unsigned long address,
struct page **hpage,
- struct vm_area_struct *vma,
int node, int referenced)
{
pmd_t *pmd, _pmd;
@@ -933,6 +933,7 @@
spinlock_t *pmd_ptl, *pte_ptl;
int isolated = 0, result = 0;
struct mem_cgroup *memcg;
+ struct vm_area_struct *vma;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
gfp_t gfp;
@@ -961,7 +962,7 @@
}
down_read(&mm->mmap_sem);
- result = hugepage_vma_revalidate(mm, address);
+ result = hugepage_vma_revalidate(mm, address, &vma);
if (result) {
mem_cgroup_cancel_charge(new_page, memcg, true);
up_read(&mm->mmap_sem);
@@ -994,7 +995,7 @@
* handled by the anon_vma lock + PG_lock.
*/
down_write(&mm->mmap_sem);
- result = hugepage_vma_revalidate(mm, address);
+ result = hugepage_vma_revalidate(mm, address, &vma);
if (result)
goto out;
/* check if the pmd is still valid */
@@ -1202,7 +1203,7 @@
if (ret) {
node = khugepaged_find_target_node();
/* collapse_huge_page will return with the mmap_sem released */
- collapse_huge_page(mm, address, hpage, vma, node, referenced);
+ collapse_huge_page(mm, address, hpage, node, referenced);
}
out:
trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
diff --git a/mm/ksm.c b/mm/ksm.c
index 73d43ba..5048083 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -283,7 +283,8 @@
{
struct rmap_item *rmap_item;
- rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
+ rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
+ __GFP_NORETRY | __GFP_NOWARN);
if (rmap_item)
ksm_rmap_items++;
return rmap_item;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9a6a51a..4be518d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1740,17 +1740,22 @@
static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
struct memcg_stock_pcp *stock;
+ unsigned long flags;
bool ret = false;
if (nr_pages > CHARGE_BATCH)
return ret;
- stock = &get_cpu_var(memcg_stock);
+ local_irq_save(flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
stock->nr_pages -= nr_pages;
ret = true;
}
- put_cpu_var(memcg_stock);
+
+ local_irq_restore(flags);
+
return ret;
}
@@ -1771,15 +1776,18 @@
stock->cached = NULL;
}
-/*
- * This must be called under preempt disabled or must be called by
- * a thread which is pinned to local cpu.
- */
static void drain_local_stock(struct work_struct *dummy)
{
- struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
+ struct memcg_stock_pcp *stock;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
drain_stock(stock);
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
+
+ local_irq_restore(flags);
}
/*
@@ -1788,14 +1796,19 @@
*/
static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
- struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
+ struct memcg_stock_pcp *stock;
+ unsigned long flags;
+ local_irq_save(flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
if (stock->cached != memcg) { /* reset if necessary */
drain_stock(stock);
stock->cached = memcg;
}
stock->nr_pages += nr_pages;
- put_cpu_var(memcg_stock);
+
+ local_irq_restore(flags);
}
/*
diff --git a/mm/memory.c b/mm/memory.c
index 83be99d..f1a6804 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3351,9 +3351,6 @@
bool was_writable = pte_write(pte);
int flags = 0;
- /* A PROT_NONE fault should not end up here */
- BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
-
/*
* The "pte" at this point cannot be used safely without
* validation through pte_unmap_same(). It's of NUMA type but
@@ -3398,7 +3395,7 @@
* pte_dirty has unpredictable behaviour between PTE scan updates,
* background writeback, dirty balancing and application behaviour.
*/
- if (!(vma->vm_flags & VM_WRITE))
+ if (!pte_write(pte))
flags |= TNF_NO_GROUP;
/*
@@ -3458,6 +3455,11 @@
return VM_FAULT_FALLBACK;
}
+static inline bool vma_is_accessible(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE);
+}
+
/*
* These routines also need to handle stuff like marking pages dirty
* and/or accessed for architectures that don't do it in hardware (most
@@ -3524,7 +3526,7 @@
if (!pte_present(entry))
return do_swap_page(fe, entry);
- if (pte_protnone(entry))
+ if (pte_protnone(entry) && vma_is_accessible(fe->vma))
return do_numa_page(fe, entry);
fe->ptl = pte_lockptr(fe->vma->vm_mm, fe->pmd);
@@ -3590,7 +3592,7 @@
barrier();
if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
- if (pmd_protnone(orig_pmd))
+ if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
return do_huge_pmd_numa_page(&fe, orig_pmd);
if ((fe.flags & FAULT_FLAG_WRITE) &&
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 41266dc..9d29ba0 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1555,8 +1555,8 @@
{
gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
int nid = page_to_nid(page);
- nodemask_t nmask = node_online_map;
- struct page *new_page;
+ nodemask_t nmask = node_states[N_MEMORY];
+ struct page *new_page = NULL;
/*
* TODO: allocate a destination hugepage from a nearest neighbor node,
@@ -1568,11 +1568,13 @@
next_node_in(nid, nmask));
node_clear(nid, nmask);
+
if (PageHighMem(page)
|| (zone_idx(page_zone(page)) == ZONE_MOVABLE))
gfp_mask |= __GFP_HIGHMEM;
- new_page = __alloc_pages_nodemask(gfp_mask, 0,
+ if (!nodes_empty(nmask))
+ new_page = __alloc_pages_nodemask(gfp_mask, 0,
node_zonelist(nid, gfp_mask), &nmask);
if (!new_page)
new_page = __alloc_pages(gfp_mask, 0,
diff --git a/mm/mmap.c b/mm/mmap.c
index ca9d91b..7a0707a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -88,6 +88,11 @@
* w: (no) no w: (no) no w: (copy) copy w: (no) no
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
*
+ * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
+ * MAP_PRIVATE:
+ * r: (no) no
+ * w: (no) no
+ * x: (yes) yes
*/
pgprot_t protection_map[16] = {
__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
@@ -3063,6 +3068,14 @@
return ERR_PTR(ret);
}
+bool vma_is_special_mapping(const struct vm_area_struct *vma,
+ const struct vm_special_mapping *sm)
+{
+ return vma->vm_private_data == sm &&
+ (vma->vm_ops == &special_mapping_vmops ||
+ vma->vm_ops == &legacy_special_mapping_vmops);
+}
+
/*
* Called with mm->mmap_sem held for writing.
* Insert a new vma covering the given region, with the given flags.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f4cd7d8..28d6f36 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2080,26 +2080,12 @@
ratelimit_pages = 16;
}
-static int
-ratelimit_handler(struct notifier_block *self, unsigned long action,
- void *hcpu)
+static int page_writeback_cpu_online(unsigned int cpu)
{
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- case CPU_DEAD:
- writeback_set_ratelimit();
- return NOTIFY_OK;
- default:
- return NOTIFY_DONE;
- }
+ writeback_set_ratelimit();
+ return 0;
}
-static struct notifier_block ratelimit_nb = {
- .notifier_call = ratelimit_handler,
- .next = NULL,
-};
-
/*
* Called early on to tune the page writeback dirty limits.
*
@@ -2122,8 +2108,10 @@
{
BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
- writeback_set_ratelimit();
- register_cpu_notifier(&ratelimit_nb);
+ cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
+ page_writeback_cpu_online, NULL);
+ cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
+ page_writeback_cpu_online);
}
/**
diff --git a/mm/page_io.c b/mm/page_io.c
index 16bd82fa..eafe5dd 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -264,6 +264,7 @@
int ret;
struct swap_info_struct *sis = page_swap_info(page);
+ BUG_ON(!PageSwapCache(page));
if (sis->flags & SWP_FILE) {
struct kiocb kiocb;
struct file *swap_file = sis->swap_file;
@@ -337,6 +338,7 @@
int ret = 0;
struct swap_info_struct *sis = page_swap_info(page);
+ BUG_ON(!PageSwapCache(page));
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageUptodate(page), page);
if (frontswap_load(page) == 0) {
@@ -386,6 +388,7 @@
if (sis->flags & SWP_FILE) {
struct address_space *mapping = sis->swap_file->f_mapping;
+ BUG_ON(!PageSwapCache(page));
return mapping->a_ops->set_page_dirty(page);
} else {
return __set_page_dirty_no_writeback(page);
diff --git a/mm/shmem.c b/mm/shmem.c
index fd8b2b5..971fc83 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -270,7 +270,7 @@
info->alloced -= pages;
shmem_recalc_inode(inode);
spin_unlock_irqrestore(&info->lock, flags);
-
+ shmem_unacct_blocks(info->flags, pages);
return false;
}
percpu_counter_add(&sbinfo->used_blocks, pages);
@@ -291,6 +291,7 @@
if (sbinfo->max_blocks)
percpu_counter_sub(&sbinfo->used_blocks, pages);
+ shmem_unacct_blocks(info->flags, pages);
}
/*
@@ -1980,7 +1981,7 @@
return addr;
sb = shm_mnt->mnt_sb;
}
- if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
+ if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
return addr;
}
diff --git a/mm/slab.c b/mm/slab.c
index b672710..090fb26 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -886,6 +886,7 @@
return 0;
}
+#if (defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)) || defined(CONFIG_SMP)
/*
* Allocates and initializes node for a node on each slab cache, used for
* either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
@@ -908,6 +909,7 @@
return 0;
}
+#endif
static int setup_kmem_cache_node(struct kmem_cache *cachep,
int node, gfp_t gfp, bool force_change)
@@ -975,6 +977,8 @@
return ret;
}
+#ifdef CONFIG_SMP
+
static void cpuup_canceled(long cpu)
{
struct kmem_cache *cachep;
@@ -1075,65 +1079,54 @@
return -ENOMEM;
}
-static int cpuup_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+int slab_prepare_cpu(unsigned int cpu)
{
- long cpu = (long)hcpu;
- int err = 0;
+ int err;
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- mutex_lock(&slab_mutex);
- err = cpuup_prepare(cpu);
- mutex_unlock(&slab_mutex);
- break;
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- start_cpu_timer(cpu);
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- /*
- * Shutdown cache reaper. Note that the slab_mutex is
- * held so that if cache_reap() is invoked it cannot do
- * anything expensive but will only modify reap_work
- * and reschedule the timer.
- */
- cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
- /* Now the cache_reaper is guaranteed to be not running. */
- per_cpu(slab_reap_work, cpu).work.func = NULL;
- break;
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- start_cpu_timer(cpu);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- /*
- * Even if all the cpus of a node are down, we don't free the
- * kmem_cache_node of any cache. This to avoid a race between
- * cpu_down, and a kmalloc allocation from another cpu for
- * memory from the node of the cpu going down. The node
- * structure is usually allocated from kmem_cache_create() and
- * gets destroyed at kmem_cache_destroy().
- */
- /* fall through */
-#endif
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- mutex_lock(&slab_mutex);
- cpuup_canceled(cpu);
- mutex_unlock(&slab_mutex);
- break;
- }
- return notifier_from_errno(err);
+ mutex_lock(&slab_mutex);
+ err = cpuup_prepare(cpu);
+ mutex_unlock(&slab_mutex);
+ return err;
}
-static struct notifier_block cpucache_notifier = {
- &cpuup_callback, NULL, 0
-};
+/*
+ * This is called for a failed online attempt and for a successful
+ * offline.
+ *
+ * Even if all the cpus of a node are down, we don't free the
+ * kmem_list3 of any cache. This to avoid a race between cpu_down, and
+ * a kmalloc allocation from another cpu for memory from the node of
+ * the cpu going down. The list3 structure is usually allocated from
+ * kmem_cache_create() and gets destroyed at kmem_cache_destroy().
+ */
+int slab_dead_cpu(unsigned int cpu)
+{
+ mutex_lock(&slab_mutex);
+ cpuup_canceled(cpu);
+ mutex_unlock(&slab_mutex);
+ return 0;
+}
+#endif
+
+static int slab_online_cpu(unsigned int cpu)
+{
+ start_cpu_timer(cpu);
+ return 0;
+}
+
+static int slab_offline_cpu(unsigned int cpu)
+{
+ /*
+ * Shutdown cache reaper. Note that the slab_mutex is held so
+ * that if cache_reap() is invoked it cannot do anything
+ * expensive but will only modify reap_work and reschedule the
+ * timer.
+ */
+ cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
+ /* Now the cache_reaper is guaranteed to be not running. */
+ per_cpu(slab_reap_work, cpu).work.func = NULL;
+ return 0;
+}
#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
/*
@@ -1336,12 +1329,6 @@
/* Done! */
slab_state = FULL;
- /*
- * Register a cpu startup notifier callback that initializes
- * cpu_cache_get for all new cpus
- */
- register_cpu_notifier(&cpucache_notifier);
-
#ifdef CONFIG_NUMA
/*
* Register a memory hotplug callback that initializes and frees
@@ -1358,13 +1345,14 @@
static int __init cpucache_init(void)
{
- int cpu;
+ int ret;
/*
* Register the timers that return unneeded pages to the page allocator
*/
- for_each_online_cpu(cpu)
- start_cpu_timer(cpu);
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SLAB online",
+ slab_online_cpu, slab_offline_cpu);
+ WARN_ON(ret < 0);
/* Done! */
slab_state = FULL;
diff --git a/mm/slub.c b/mm/slub.c
index 9adae58..2b3e740 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -194,10 +194,6 @@
#define __OBJECT_POISON 0x80000000UL /* Poison object */
#define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */
-#ifdef CONFIG_SMP
-static struct notifier_block slab_notifier;
-#endif
-
/*
* Tracking user of a slab.
*/
@@ -2305,6 +2301,25 @@
}
/*
+ * Use the cpu notifier to insure that the cpu slabs are flushed when
+ * necessary.
+ */
+static int slub_cpu_dead(unsigned int cpu)
+{
+ struct kmem_cache *s;
+ unsigned long flags;
+
+ mutex_lock(&slab_mutex);
+ list_for_each_entry(s, &slab_caches, list) {
+ local_irq_save(flags);
+ __flush_cpu_slab(s, cpu);
+ local_irq_restore(flags);
+ }
+ mutex_unlock(&slab_mutex);
+ return 0;
+}
+
+/*
* Check if the objects in a per cpu structure fit numa
* locality expectations.
*/
@@ -4144,9 +4159,8 @@
/* Setup random freelists for each cache */
init_freelist_randomization();
-#ifdef CONFIG_SMP
- register_cpu_notifier(&slab_notifier);
-#endif
+ cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
+ slub_cpu_dead);
pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%d, Nodes=%d\n",
cache_line_size(),
@@ -4210,43 +4224,6 @@
return err;
}
-#ifdef CONFIG_SMP
-/*
- * Use the cpu notifier to insure that the cpu slabs are flushed when
- * necessary.
- */
-static int slab_cpuup_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- long cpu = (long)hcpu;
- struct kmem_cache *s;
- unsigned long flags;
-
- switch (action) {
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- mutex_lock(&slab_mutex);
- list_for_each_entry(s, &slab_caches, list) {
- local_irq_save(flags);
- __flush_cpu_slab(s, cpu);
- local_irq_restore(flags);
- }
- mutex_unlock(&slab_mutex);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block slab_notifier = {
- .notifier_call = slab_cpuup_callback
-};
-
-#endif
-
void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
{
struct kmem_cache *s;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 78cfa29..2657acc 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2724,7 +2724,6 @@
struct swap_info_struct *page_swap_info(struct page *page)
{
swp_entry_t swap = { .val = page_private(page) };
- BUG_ON(!PageSwapCache(page));
return swap_info[swp_type(swap)];
}
diff --git a/mm/usercopy.c b/mm/usercopy.c
index 089328f..3c8da0a 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -207,8 +207,11 @@
* Some architectures (arm64) return true for virt_addr_valid() on
* vmalloced addresses. Work around this by checking for vmalloc
* first.
+ *
+ * We also need to check for module addresses explicitly since we
+ * may copy static data from modules to userspace
*/
- if (is_vmalloc_addr(ptr))
+ if (is_vmalloc_or_module_addr(ptr))
return NULL;
if (!virt_addr_valid(ptr))
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b1e12a1..0fe8b71 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2303,23 +2303,6 @@
}
}
-#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-static void init_tlb_ubc(void)
-{
- /*
- * This deliberately does not clear the cpumask as it's expensive
- * and unnecessary. If there happens to be data in there then the
- * first SWAP_CLUSTER_MAX pages will send an unnecessary IPI and
- * then will be cleared.
- */
- current->tlb_ubc.flush_required = false;
-}
-#else
-static inline void init_tlb_ubc(void)
-{
-}
-#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
-
/*
* This is a basic per-node page freer. Used by both kswapd and direct reclaim.
*/
@@ -2355,8 +2338,6 @@
scan_adjusted = (global_reclaim(sc) && !current_is_kswapd() &&
sc->priority == DEF_PRIORITY);
- init_tlb_ubc();
-
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
nr[LRU_INACTIVE_FILE]) {
diff --git a/mm/workingset.c b/mm/workingset.c
index 69551cf..617475f 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -418,21 +418,19 @@
* no pages, so we expect to be able to remove them all and
* delete and free the empty node afterwards.
*/
-
- BUG_ON(!node->count);
- BUG_ON(node->count & RADIX_TREE_COUNT_MASK);
+ BUG_ON(!workingset_node_shadows(node));
+ BUG_ON(workingset_node_pages(node));
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
if (node->slots[i]) {
BUG_ON(!radix_tree_exceptional_entry(node->slots[i]));
node->slots[i] = NULL;
- BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
- node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
+ workingset_node_shadows_dec(node);
BUG_ON(!mapping->nrexceptional);
mapping->nrexceptional--;
}
}
- BUG_ON(node->count);
+ BUG_ON(workingset_node_shadows(node));
inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
if (!__radix_tree_delete_node(&mapping->page_tree, node))
BUG();
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 7d170010..ee08540 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -335,7 +335,7 @@
goto out;
skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN);
- elp_buff = skb_push(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
+ elp_buff = skb_put(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
elp_packet = (struct batadv_elp_packet *)elp_buff;
memset(elp_packet, 0, BATADV_ELP_HLEN);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 7602c00..3d19947 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -470,6 +470,29 @@
}
/**
+ * batadv_last_bonding_get - Get last_bonding_candidate of orig_node
+ * @orig_node: originator node whose last bonding candidate should be retrieved
+ *
+ * Return: last bonding candidate of router or NULL if not found
+ *
+ * The object is returned with refcounter increased by 1.
+ */
+static struct batadv_orig_ifinfo *
+batadv_last_bonding_get(struct batadv_orig_node *orig_node)
+{
+ struct batadv_orig_ifinfo *last_bonding_candidate;
+
+ spin_lock_bh(&orig_node->neigh_list_lock);
+ last_bonding_candidate = orig_node->last_bonding_candidate;
+
+ if (last_bonding_candidate)
+ kref_get(&last_bonding_candidate->refcount);
+ spin_unlock_bh(&orig_node->neigh_list_lock);
+
+ return last_bonding_candidate;
+}
+
+/**
* batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node
* @orig_node: originator node whose bonding candidates should be replaced
* @new_candidate: new bonding candidate or NULL
@@ -539,7 +562,7 @@
* router - obviously there are no other candidates.
*/
rcu_read_lock();
- last_candidate = orig_node->last_bonding_candidate;
+ last_candidate = batadv_last_bonding_get(orig_node);
if (last_candidate)
last_cand_router = rcu_dereference(last_candidate->router);
@@ -631,6 +654,9 @@
batadv_orig_ifinfo_put(next_candidate);
}
+ if (last_candidate)
+ batadv_orig_ifinfo_put(last_candidate);
+
return router;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 25dab8b..fd7b41e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1362,7 +1362,6 @@
if (!try_module_get(prot->owner))
goto out_free_sec;
sk_tx_queue_clear(sk);
- cgroup_sk_alloc(&sk->sk_cgrp_data);
}
return sk;
@@ -1422,6 +1421,7 @@
sock_net_set(sk, net);
atomic_set(&sk->sk_wmem_alloc, 1);
+ cgroup_sk_alloc(&sk->sk_cgrp_data);
sock_update_classid(&sk->sk_cgrp_data);
sock_update_netprioidx(&sk->sk_cgrp_data);
}
@@ -1566,6 +1566,9 @@
newsk->sk_priority = 0;
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
+
+ cgroup_sk_alloc(&newsk->sk_cgrp_data);
+
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 4b351af..d6feabb 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -312,6 +312,7 @@
{
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
+ struct net_device *dev = skb->dev;
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -341,7 +342,7 @@
*/
if (!skb_valid_dst(skb)) {
int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, skb->dev);
+ iph->tos, dev);
if (unlikely(err)) {
if (err == -EXDEV)
__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
@@ -370,7 +371,7 @@
__IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
} else if (skb->pkt_type == PACKET_BROADCAST ||
skb->pkt_type == PACKET_MULTICAST) {
- struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
/* RFC 1122 3.3.6:
*
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index cc701fa..5d7944f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -88,6 +88,7 @@
struct net_device *dev;
struct pcpu_sw_netstats *tstats;
struct xfrm_state *x;
+ struct xfrm_mode *inner_mode;
struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
u32 orig_mark = skb->mark;
int ret;
@@ -105,7 +106,19 @@
}
x = xfrm_input_state(skb);
- family = x->inner_mode->afinfo->family;
+
+ inner_mode = x->inner_mode;
+
+ if (x->sel.family == AF_UNSPEC) {
+ inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+ if (inner_mode == NULL) {
+ XFRM_INC_STATS(dev_net(skb->dev),
+ LINUX_MIB_XFRMINSTATEMODEERROR);
+ return -EINVAL;
+ }
+ }
+
+ family = inner_mode->afinfo->family;
skb->mark = be32_to_cpu(tunnel->parms.i_key);
ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 2625332..5f006e1 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2076,6 +2076,7 @@
struct rta_mfc_stats mfcs;
struct nlattr *mp_attr;
struct rtnexthop *nhp;
+ unsigned long lastuse;
int ct;
/* If cache is unresolved, don't try to parse IIF and OIF */
@@ -2105,12 +2106,14 @@
nla_nest_end(skb, mp_attr);
+ lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+ lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
mfcs.mfcs_packets = c->mfc_un.res.pkt;
mfcs.mfcs_bytes = c->mfc_un.res.bytes;
mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES,
- jiffies_to_clock_t(c->mfc_un.res.lastuse),
+ nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
RTA_PAD))
return -EMSGSIZE;
@@ -2120,7 +2123,7 @@
int ipmr_get_route(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr,
- struct rtmsg *rtm, int nowait)
+ struct rtmsg *rtm, int nowait, u32 portid)
{
struct mfc_cache *cache;
struct mr_table *mrt;
@@ -2165,6 +2168,7 @@
return -ENOMEM;
}
+ NETLINK_CB(skb2).portid = portid;
skb_push(skb2, sizeof(struct iphdr));
skb_reset_network_header(skb2);
iph = ip_hdr(skb2);
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 2375b0a..30493be 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -31,6 +31,7 @@
__be32 saddr, daddr;
u_int8_t tos;
const struct iphdr *iph;
+ int err;
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct iphdr) ||
@@ -46,15 +47,17 @@
tos = iph->tos;
ret = nft_do_chain(&pkt, priv);
- if (ret != NF_DROP && ret != NF_QUEUE) {
+ if (ret != NF_DROP && ret != NF_STOLEN) {
iph = ip_hdr(skb);
if (iph->saddr != saddr ||
iph->daddr != daddr ||
skb->mark != mark ||
- iph->tos != tos)
- if (ip_route_me_harder(state->net, skb, RTN_UNSPEC))
- ret = NF_DROP;
+ iph->tos != tos) {
+ err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
}
return ret;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a1f2830..62c3ed0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -476,12 +476,18 @@
atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
u32 old = ACCESS_ONCE(*p_tstamp);
u32 now = (u32)jiffies;
- u32 delta = 0;
+ u32 new, delta = 0;
if (old != now && cmpxchg(p_tstamp, old, now) == old)
delta = prandom_u32_max(now - old);
- return atomic_add_return(segs + delta, p_id) - segs;
+ /* Do not use atomic_add_return() as it makes UBSAN unhappy */
+ do {
+ old = (u32)atomic_read(p_id);
+ new = old + delta + segs;
+ } while (atomic_cmpxchg(p_id, old, new) != old);
+
+ return new - segs;
}
EXPORT_SYMBOL(ip_idents_reserve);
@@ -2497,7 +2503,8 @@
IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
int err = ipmr_get_route(net, skb,
fl4->saddr, fl4->daddr,
- r, nowait);
+ r, nowait, portid);
+
if (err <= 0) {
if (!nowait) {
if (err == 0)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ebf45b..a756b87 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2329,10 +2329,9 @@
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
msg,
- &np->daddr, ntohs(inet->inet_dport),
+ &sk->sk_v6_daddr, ntohs(inet->inet_dport),
tp->snd_cwnd, tcp_left_out(tp),
tp->snd_ssthresh, tp->prior_ssthresh,
tp->packets_out);
@@ -5885,7 +5884,7 @@
* so release it.
*/
if (req) {
- tp->total_retrans = req->num_retrans;
+ inet_csk(sk)->icsk_retransmits = 0;
reqsk_fastopen_remove(sk, req, false);
} else {
/* Make sure socket is routed, for correct metrics. */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bdaef7f..d48d557 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1966,12 +1966,14 @@
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
copy = min_t(int, skb->len, probe_size - len);
- if (nskb->ip_summed)
+ if (nskb->ip_summed) {
skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
- else
- nskb->csum = skb_copy_and_csum_bits(skb, 0,
- skb_put(nskb, copy),
- copy, nskb->csum);
+ } else {
+ __wsum csum = skb_copy_and_csum_bits(skb, 0,
+ skb_put(nskb, copy),
+ copy, 0);
+ nskb->csum = csum_block_add(nskb->csum, csum, len);
+ }
if (skb->len <= copy) {
/* We've eaten all the data from this skb.
@@ -2605,7 +2607,8 @@
* copying overhead: fragmentation, tunneling, mangling etc.
*/
if (atomic_read(&sk->sk_wmem_alloc) >
- min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
+ min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
+ sk->sk_sndbuf))
return -EAGAIN;
if (skb_still_in_host_queue(sk, skb))
@@ -2830,7 +2833,7 @@
if (tcp_retransmit_skb(sk, skb, segs))
return;
- NET_INC_STATS(sock_net(sk), mib_idx);
+ NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb));
if (tcp_in_cwnd_reduction(sk))
tp->prr_out += tcp_skb_pcount(skb);
@@ -3567,6 +3570,8 @@
if (!res) {
__TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ if (unlikely(tcp_passive_fastopen(sk)))
+ tcp_sk(sk)->total_retrans++;
}
return res;
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d84930b..f712b41 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -384,6 +384,7 @@
*/
inet_rtx_syn_ack(sk, req);
req->num_timeout++;
+ icsk->icsk_retransmits++;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 704274c..edc3daa 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -648,7 +648,6 @@
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = skb->protocol;
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err)
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d90a11f..5bd3afd 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -321,11 +321,9 @@
goto discard;
}
- XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
-
rcu_read_unlock();
- return xfrm6_rcv(skb);
+ return xfrm6_rcv_tnl(skb, t);
}
rcu_read_unlock();
return -EINVAL;
@@ -340,6 +338,7 @@
struct net_device *dev;
struct pcpu_sw_netstats *tstats;
struct xfrm_state *x;
+ struct xfrm_mode *inner_mode;
struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
u32 orig_mark = skb->mark;
int ret;
@@ -357,7 +356,19 @@
}
x = xfrm_input_state(skb);
- family = x->inner_mode->afinfo->family;
+
+ inner_mode = x->inner_mode;
+
+ if (x->sel.family == AF_UNSPEC) {
+ inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+ if (inner_mode == NULL) {
+ XFRM_INC_STATS(dev_net(skb->dev),
+ LINUX_MIB_XFRMINSTATEMODEERROR);
+ return -EINVAL;
+ }
+ }
+
+ family = inner_mode->afinfo->family;
skb->mark = be32_to_cpu(t->parms.i_key);
ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 6122f9c..7f4265b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2239,6 +2239,7 @@
struct rta_mfc_stats mfcs;
struct nlattr *mp_attr;
struct rtnexthop *nhp;
+ unsigned long lastuse;
int ct;
/* If cache is unresolved, don't try to parse IIF and OIF */
@@ -2269,12 +2270,14 @@
nla_nest_end(skb, mp_attr);
+ lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+ lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
mfcs.mfcs_packets = c->mfc_un.res.pkt;
mfcs.mfcs_bytes = c->mfc_un.res.bytes;
mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES,
- jiffies_to_clock_t(c->mfc_un.res.lastuse),
+ nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
RTA_PAD))
return -EMSGSIZE;
@@ -2282,8 +2285,8 @@
return 1;
}
-int ip6mr_get_route(struct net *net,
- struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
+ int nowait, u32 portid)
{
int err;
struct mr6_table *mrt;
@@ -2328,6 +2331,7 @@
return -ENOMEM;
}
+ NETLINK_CB(skb2).portid = portid;
skb_reset_transport_header(skb2);
skb_put(skb2, sizeof(struct ipv6hdr));
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 71d995f..2535223 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -31,6 +31,7 @@
struct in6_addr saddr, daddr;
u_int8_t hop_limit;
u32 mark, flowlabel;
+ int err;
/* malformed packet, drop it */
if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
@@ -46,13 +47,16 @@
flowlabel = *((u32 *)ipv6_hdr(skb));
ret = nft_do_chain(&pkt, priv);
- if (ret != NF_DROP && ret != NF_QUEUE &&
+ if (ret != NF_DROP && ret != NF_STOLEN &&
(memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
skb->mark != mark ||
ipv6_hdr(skb)->hop_limit != hop_limit ||
- flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
- return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP;
+ flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
+ err = ip6_route_me_harder(state->net, skb);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
return ret;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4981755..269218a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1986,9 +1986,18 @@
if (!(gwa_type & IPV6_ADDR_UNICAST))
goto out;
- if (cfg->fc_table)
+ if (cfg->fc_table) {
grt = ip6_nh_lookup_table(net, cfg, gw_addr);
+ if (grt) {
+ if (grt->rt6i_flags & RTF_GATEWAY ||
+ (dev && dev != grt->dst.dev)) {
+ ip6_rt_put(grt);
+ grt = NULL;
+ }
+ }
+ }
+
if (!grt)
grt = rt6_lookup(net, gw_addr, NULL,
cfg->fc_ifindex, 1);
@@ -3193,7 +3202,9 @@
if (iif) {
#ifdef CONFIG_IPV6_MROUTE
if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
- int err = ip6mr_get_route(net, skb, rtm, nowait);
+ int err = ip6mr_get_route(net, skb, rtm, nowait,
+ portid);
+
if (err <= 0) {
if (!nowait) {
if (err == 0)
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 00a2d40..b578956 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -21,9 +21,10 @@
return xfrm6_extract_header(skb);
}
-int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
+ struct ip6_tnl *t)
{
- XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
return xfrm_input(skb, nexthdr, spi, 0);
@@ -49,13 +50,18 @@
return -1;
}
-int xfrm6_rcv(struct sk_buff *skb)
+int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
{
return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
- 0);
+ 0, t);
+}
+EXPORT_SYMBOL(xfrm6_rcv_tnl);
+
+int xfrm6_rcv(struct sk_buff *skb)
+{
+ return xfrm6_rcv_tnl(skb, NULL);
}
EXPORT_SYMBOL(xfrm6_rcv);
-
int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
xfrm_address_t *saddr, u8 proto)
{
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 5743044..e1c0bbe 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -236,7 +236,7 @@
__be32 spi;
spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr);
- return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi);
+ return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL);
}
static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 8d2f7c9..ccc2444 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -832,7 +832,7 @@
struct sock *sk = sock->sk;
struct irda_sock *new, *self = irda_sk(sk);
struct sock *newsk;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
int err;
err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
@@ -900,7 +900,6 @@
err = -EPERM; /* value does not seem to make sense. -arnd */
if (!new->tsap) {
pr_debug("%s(), dup failed!\n", __func__);
- kfree_skb(skb);
goto out;
}
@@ -919,7 +918,6 @@
/* Clean up the original one to keep it in listen state */
irttp_listen(self->tsap);
- kfree_skb(skb);
sk->sk_ack_backlog--;
newsock->state = SS_CONNECTED;
@@ -927,6 +925,7 @@
irda_connect_response(new);
err = 0;
out:
+ kfree_skb(skb);
release_sock(sk);
return err;
}
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index a9aff60..afa9468 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -261,10 +261,16 @@
.timeout = timeout,
.ssn = start_seq_num,
};
-
int i, ret = -EOPNOTSUPP;
u16 status = WLAN_STATUS_REQUEST_DECLINED;
+ if (tid >= IEEE80211_FIRST_TSPEC_TSID) {
+ ht_dbg(sta->sdata,
+ "STA %pM requests BA session on unsupported tid %d\n",
+ sta->sta.addr, tid);
+ goto end_no_lock;
+ }
+
if (!sta->sta.ht_cap.ht_supported) {
ht_dbg(sta->sdata,
"STA %pM erroneously requests BA session on tid %d w/o QoS\n",
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 5650c46..45319cc 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -584,6 +584,9 @@
ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW))
return -EINVAL;
+ if (WARN_ON(tid >= IEEE80211_FIRST_TSPEC_TSID))
+ return -EINVAL;
+
ht_dbg(sdata, "Open BA session requested for %pM tid %u\n",
pubsta->addr, tid);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 8f9c3bd..faccef9 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -746,6 +746,7 @@
sta = next_hop_deref_protected(mpath);
if (mpath->flags & MESH_PATH_ACTIVE &&
ether_addr_equal(ta, sta->sta.addr) &&
+ !(mpath->flags & MESH_PATH_FIXED) &&
(!(mpath->flags & MESH_PATH_SN_VALID) ||
SN_GT(target_sn, mpath->sn) || target_sn == 0)) {
mpath->flags &= ~MESH_PATH_ACTIVE;
@@ -1012,7 +1013,7 @@
goto enddiscovery;
spin_lock_bh(&mpath->state_lock);
- if (mpath->flags & MESH_PATH_DELETED) {
+ if (mpath->flags & (MESH_PATH_DELETED | MESH_PATH_FIXED)) {
spin_unlock_bh(&mpath->state_lock);
goto enddiscovery;
}
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 6db2ddf..f0e6175 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -826,7 +826,7 @@
mpath->metric = 0;
mpath->hop_count = 0;
mpath->exp_time = 0;
- mpath->flags |= MESH_PATH_FIXED;
+ mpath->flags = MESH_PATH_FIXED | MESH_PATH_SN_VALID;
mesh_path_activate(mpath);
spin_unlock_bh(&mpath->state_lock);
mesh_path_tx_pending(mpath);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 76b737d..aa58df8 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1616,7 +1616,6 @@
sta_info_recalc_tim(sta);
} else {
- unsigned long tids = sta->txq_buffered_tids & driver_release_tids;
int tid;
/*
@@ -1648,7 +1647,8 @@
for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
- if (!(tids & BIT(tid)) || txqi->tin.backlog_packets)
+ if (!(driver_release_tids & BIT(tid)) ||
+ txqi->tin.backlog_packets)
continue;
sta_info_recalc_tim(sta);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 5023966..18b285e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -796,6 +796,36 @@
return ret;
}
+static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *pubsta,
+ struct sk_buff *skb)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_txq *txq = NULL;
+
+ if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
+ (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
+ return NULL;
+
+ if (!ieee80211_is_data(hdr->frame_control))
+ return NULL;
+
+ if (pubsta) {
+ u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+
+ txq = pubsta->txq[tid];
+ } else if (vif) {
+ txq = vif->txq;
+ }
+
+ if (!txq)
+ return NULL;
+
+ return to_txq_info(txq);
+}
+
static ieee80211_tx_result debug_noinline
ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
{
@@ -853,7 +883,8 @@
tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
tx->sta->tx_stats.msdu[tid]++;
- if (!tx->sta->sta.txq[0])
+ if (!ieee80211_get_txq(tx->local, info->control.vif, &tx->sta->sta,
+ tx->skb))
hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
return TX_CONTINUE;
@@ -1243,36 +1274,6 @@
return TX_CONTINUE;
}
-static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
- struct ieee80211_vif *vif,
- struct ieee80211_sta *pubsta,
- struct sk_buff *skb)
-{
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- struct ieee80211_txq *txq = NULL;
-
- if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
- (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
- return NULL;
-
- if (!ieee80211_is_data(hdr->frame_control))
- return NULL;
-
- if (pubsta) {
- u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
-
- txq = pubsta->txq[tid];
- } else if (vif) {
- txq = vif->txq;
- }
-
- if (!txq)
- return NULL;
-
- return to_txq_info(txq);
-}
-
static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb)
{
IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time();
@@ -1514,8 +1515,12 @@
spin_unlock_bh(&fq->lock);
if (skb && skb_has_frag_list(skb) &&
- !ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
- skb_linearize(skb);
+ !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) {
+ if (skb_linearize(skb)) {
+ ieee80211_free_txskb(&local->hw, skb);
+ return NULL;
+ }
+ }
return skb;
}
@@ -3264,7 +3269,7 @@
if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
*ieee80211_get_qos_ctl(hdr) = tid;
- if (!sta->sta.txq[0])
+ if (!ieee80211_get_txq(local, &sdata->vif, &sta->sta, skb))
hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
} else {
info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index dd2c43ab..9934b0c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1035,9 +1035,9 @@
if (IS_ERR(ct))
return (struct nf_conntrack_tuple_hash *)ct;
- if (tmpl && nfct_synproxy(tmpl)) {
- nfct_seqadj_ext_add(ct);
- nfct_synproxy_ext_add(ct);
+ if (!nf_ct_add_synproxy(ct, tmpl)) {
+ nf_conntrack_free(ct);
+ return ERR_PTR(-ENOMEM);
}
timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index de31818..ecee105 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -441,7 +441,8 @@
ct->status |= IPS_DST_NAT;
if (nfct_help(ct))
- nfct_seqadj_ext_add(ct);
+ if (!nfct_seqadj_ext_add(ct))
+ return NF_DROP;
}
if (maniptype == NF_NAT_MANIP_SRC) {
@@ -807,7 +808,7 @@
if (err < 0)
return err;
- return nf_nat_setup_info(ct, &range, manip);
+ return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
}
#else
static int
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 39eb1cc..fa24a5b 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -237,7 +237,7 @@
break;
case NFT_TRACETYPE_POLICY:
if (nla_put_be32(skb, NFTA_TRACE_POLICY,
- info->basechain->policy))
+ htonl(info->basechain->policy)))
goto nla_put_failure;
break;
}
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index e87cd81..4a60cd5 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -53,7 +53,7 @@
u32 *tlv = (u32 *)(skbdata);
u16 totlen = nla_total_size(dlen); /*alignment + hdr */
char *dptr = (char *)tlv + NLA_HDRLEN;
- u32 htlv = attrtype << 16 | dlen;
+ u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN);
*tlv = htonl(htlv);
memset(dptr, 0, totlen - NLA_HDRLEN);
@@ -627,7 +627,7 @@
struct tcf_ife_info *ife = to_ife(a);
int action = ife->tcf_action;
struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data;
- u16 ifehdrln = ifehdr->metalen;
+ int ifehdrln = (int)ifehdr->metalen;
struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data);
spin_lock(&ife->tcf_lock);
@@ -740,8 +740,6 @@
return TC_ACT_SHOT;
}
- iethh = eth_hdr(skb);
-
err = skb_cow_head(skb, hdrm);
if (unlikely(err)) {
ife->tcf_qstats.drops++;
@@ -752,6 +750,7 @@
if (!(at & AT_EGRESS))
skb_push(skb, skb->dev->hard_header_len);
+ iethh = (struct ethhdr *)skb->data;
__skb_push(skb, hdrm);
memcpy(skb->data, iethh, skb->mac_len);
skb_reset_mac_header(skb);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index f27ffee..ca0516e 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1153,6 +1153,7 @@
if (!skb)
return NULL;
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
qdisc_bstats_update(sch, skb);
@@ -1256,6 +1257,7 @@
}
bstats_update(&cl->bstats, skb);
+ qdisc_qstats_backlog_inc(sch, skb);
++sch->q.qlen;
agg = cl->agg;
@@ -1476,6 +1478,7 @@
qdisc_reset(cl->qdisc);
}
}
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index add3cc7..20a350b 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -400,6 +400,7 @@
enqueue:
ret = qdisc_enqueue(skb, child, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
increment_qlen(skb, q);
} else if (net_xmit_drop_count(ret)) {
@@ -428,6 +429,7 @@
if (skb) {
qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
decrement_qlen(skb, q);
}
@@ -450,6 +452,7 @@
struct sfb_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
q->slot = 0;
q->double_buffering = false;
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index a55e547..0a3dbec 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -179,6 +179,11 @@
msg, msg->expires_at, jiffies);
}
+ if (asoc->peer.prsctp_capable &&
+ SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
+ msg->expires_at =
+ jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
+
/* This is the biggest possible DATA chunk that can fit into
* the packet
*/
@@ -335,7 +340,7 @@
/* Check whether this message has expired. */
int sctp_chunk_abandoned(struct sctp_chunk *chunk)
{
- if (!chunk->asoc->prsctp_enable ||
+ if (!chunk->asoc->peer.prsctp_capable ||
!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) {
struct sctp_datamsg *msg = chunk->msg;
@@ -349,14 +354,14 @@
}
if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
- time_after(jiffies, chunk->prsctp_param)) {
+ time_after(jiffies, chunk->msg->expires_at)) {
if (chunk->sent_count)
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
else
chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
return 1;
} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
- chunk->sent_count > chunk->prsctp_param) {
+ chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
return 1;
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 69444d3..1555fb8 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -796,27 +796,34 @@
static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
+ struct sctp_transport *t = (struct sctp_transport *)ptr;
const struct sctp_hash_cmp_arg *x = arg->key;
- const struct sctp_transport *t = ptr;
- struct sctp_association *asoc = t->asoc;
- const struct net *net = x->net;
+ struct sctp_association *asoc;
+ int err = 1;
if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
- return 1;
- if (!net_eq(sock_net(asoc->base.sk), net))
- return 1;
+ return err;
+ if (!sctp_transport_hold(t))
+ return err;
+
+ asoc = t->asoc;
+ if (!net_eq(sock_net(asoc->base.sk), x->net))
+ goto out;
if (x->ep) {
if (x->ep != asoc->ep)
- return 1;
+ goto out;
} else {
if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
- return 1;
+ goto out;
if (!sctp_bind_addr_match(&asoc->base.bind_addr,
x->laddr, sctp_sk(asoc->base.sk)))
- return 1;
+ goto out;
}
- return 0;
+ err = 0;
+out:
+ sctp_transport_put(t);
+ return err;
}
static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 72e54a4..107233d 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -326,7 +326,7 @@
sctp_chunk_hold(chunk);
sctp_outq_tail_data(q, chunk);
- if (chunk->asoc->prsctp_enable &&
+ if (chunk->asoc->peer.prsctp_capable &&
SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
chunk->asoc->sent_cnt_removable++;
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
@@ -383,7 +383,7 @@
list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
- chk->prsctp_param <= sinfo->sinfo_timetolive)
+ chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
list_del_init(&chk->transmitted_list);
@@ -418,7 +418,7 @@
list_for_each_entry_safe(chk, temp, queue, list) {
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
- chk->prsctp_param <= sinfo->sinfo_timetolive)
+ chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
list_del_init(&chk->list);
@@ -442,7 +442,7 @@
{
struct sctp_transport *transport;
- if (!asoc->prsctp_enable || !asoc->sent_cnt_removable)
+ if (!asoc->peer.prsctp_capable || !asoc->sent_cnt_removable)
return;
msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
@@ -1055,7 +1055,7 @@
/* Mark as failed send. */
sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
- if (asoc->prsctp_enable &&
+ if (asoc->peer.prsctp_capable &&
SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
asoc->sent_cnt_removable--;
sctp_chunk_free(chunk);
@@ -1347,7 +1347,7 @@
tsn = ntohl(tchunk->subh.data_hdr->tsn);
if (TSN_lte(tsn, ctsn)) {
list_del_init(&tchunk->transmitted_list);
- if (asoc->prsctp_enable &&
+ if (asoc->peer.prsctp_capable &&
SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
asoc->sent_cnt_removable--;
sctp_chunk_free(tchunk);
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index f3508aa..cef0cee 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -272,28 +272,17 @@
return err;
}
-static int sctp_tsp_dump(struct sctp_transport *tsp, void *p)
+static int sctp_sock_dump(struct sock *sk, void *p)
{
- struct sctp_endpoint *ep = tsp->asoc->ep;
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_comm_param *commp = p;
- struct sock *sk = ep->base.sk;
struct sk_buff *skb = commp->skb;
struct netlink_callback *cb = commp->cb;
const struct inet_diag_req_v2 *r = commp->r;
- struct sctp_association *assoc =
- list_entry(ep->asocs.next, struct sctp_association, asocs);
+ struct sctp_association *assoc;
int err = 0;
- /* find the ep only once through the transports by this condition */
- if (tsp->asoc != assoc)
- goto out;
-
- if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
- goto out;
-
lock_sock(sk);
- if (sk != assoc->base.sk)
- goto release;
list_for_each_entry(assoc, &ep->asocs, asocs) {
if (cb->args[4] < cb->args[1])
goto next;
@@ -312,7 +301,7 @@
cb->nlh->nlmsg_seq,
NLM_F_MULTI, cb->nlh) < 0) {
cb->args[3] = 1;
- err = 2;
+ err = 1;
goto release;
}
cb->args[3] = 1;
@@ -321,7 +310,7 @@
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) {
- err = 2;
+ err = 1;
goto release;
}
next:
@@ -333,10 +322,35 @@
cb->args[4] = 0;
release:
release_sock(sk);
+ sock_put(sk);
return err;
+}
+
+static int sctp_get_sock(struct sctp_transport *tsp, void *p)
+{
+ struct sctp_endpoint *ep = tsp->asoc->ep;
+ struct sctp_comm_param *commp = p;
+ struct sock *sk = ep->base.sk;
+ struct netlink_callback *cb = commp->cb;
+ const struct inet_diag_req_v2 *r = commp->r;
+ struct sctp_association *assoc =
+ list_entry(ep->asocs.next, struct sctp_association, asocs);
+
+ /* find the ep only once through the transports by this condition */
+ if (tsp->asoc != assoc)
+ goto out;
+
+ if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
+ goto out;
+
+ sock_hold(sk);
+ cb->args[5] = (long)sk;
+
+ return 1;
+
out:
cb->args[2]++;
- return err;
+ return 0;
}
static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
@@ -472,10 +486,18 @@
* 2 : to record the transport pos of this time's traversal
* 3 : to mark if we have dumped the ep info of the current asoc
* 4 : to work as a temporary variable to traversal list
+ * 5 : to save the sk we get from travelsing the tsp list.
*/
if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
goto done;
- sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp);
+
+next:
+ cb->args[5] = 0;
+ sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp);
+
+ if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp))
+ goto next;
+
done:
cb->args[1] = cb->args[4];
cb->args[4] = 0;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 8c77b87..46ffecc 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -706,20 +706,6 @@
return retval;
}
-static void sctp_set_prsctp_policy(struct sctp_chunk *chunk,
- const struct sctp_sndrcvinfo *sinfo)
-{
- if (!chunk->asoc->prsctp_enable)
- return;
-
- if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
- chunk->prsctp_param =
- jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
- else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) ||
- SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags))
- chunk->prsctp_param = sinfo->sinfo_timetolive;
-}
-
/* Make a DATA chunk for the given association from the provided
* parameters. However, do not populate the data payload.
*/
@@ -753,7 +739,6 @@
retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
- sctp_set_prsctp_policy(retval, sinfo);
nodata:
return retval;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9fc417a..8ed2d99 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4469,17 +4469,21 @@
const union sctp_addr *paddr, void *p)
{
struct sctp_transport *transport;
- int err = 0;
+ int err = -ENOENT;
rcu_read_lock();
transport = sctp_addrs_lookup_transport(net, laddr, paddr);
if (!transport || !sctp_transport_hold(transport))
goto out;
- err = cb(transport, p);
+
+ sctp_association_hold(transport->asoc);
sctp_transport_put(transport);
-out:
rcu_read_unlock();
+ err = cb(transport, p);
+ sctp_association_put(transport->asoc);
+
+out:
return err;
}
EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 17dbbe6..8a398b3 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -465,6 +465,8 @@
if (vsock_is_pending(sk)) {
vsock_remove_pending(listener, sk);
+
+ listener->sk_ack_backlog--;
} else if (!vsk->rejected) {
/* We are not on the pending list and accept() did not reject
* us, so we must have been accepted by our user process. We
@@ -475,8 +477,6 @@
goto out;
}
- listener->sk_ack_backlog--;
-
/* We need to remove ourself from the global connected sockets list so
* incoming packets can't find this socket, and to reduce the reference
* count.
@@ -2010,5 +2010,5 @@
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Virtual Socket Family");
-MODULE_VERSION("1.0.1.0-k");
+MODULE_VERSION("1.0.2.0-k");
MODULE_LICENSE("GPL v2");
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f02653a..4809f4d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6978,7 +6978,7 @@
params.n_counter_offsets_presp = len / sizeof(u16);
if (rdev->wiphy.max_num_csa_counters &&
- (params.n_counter_offsets_beacon >
+ (params.n_counter_offsets_presp >
rdev->wiphy.max_num_csa_counters))
return -EINVAL;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9895a8c..a30f898d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -332,6 +332,7 @@
{
tasklet_hrtimer_cancel(&x->mtimer);
del_timer_sync(&x->rtimer);
+ kfree(x->aead);
kfree(x->aalg);
kfree(x->ealg);
kfree(x->calg);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index cb65d91..0889209 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -581,9 +581,12 @@
if (err)
goto error;
- if (attrs[XFRMA_SEC_CTX] &&
- security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX])))
- goto error;
+ if (attrs[XFRMA_SEC_CTX]) {
+ err = security_xfrm_state_alloc(x,
+ nla_data(attrs[XFRMA_SEC_CTX]));
+ if (err)
+ goto error;
+ }
if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
attrs[XFRMA_REPLAY_ESN_VAL])))
diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py
index df643f6..a32e4da 100755
--- a/scripts/checkkconfigsymbols.py
+++ b/scripts/checkkconfigsymbols.py
@@ -1,98 +1,99 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
"""Find Kconfig symbols that are referenced but not defined."""
-# (c) 2014-2015 Valentin Rothberg <valentinrothberg@gmail.com>
+# (c) 2014-2016 Valentin Rothberg <valentinrothberg@gmail.com>
# (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de>
#
# Licensed under the terms of the GNU GPL License version 2
+import argparse
import difflib
import os
import re
import signal
+import subprocess
import sys
from multiprocessing import Pool, cpu_count
-from optparse import OptionParser
-from subprocess import Popen, PIPE, STDOUT
# regex expressions
OPERATORS = r"&|\(|\)|\||\!"
-FEATURE = r"(?:\w*[A-Z0-9]\w*){2,}"
-DEF = r"^\s*(?:menu){,1}config\s+(" + FEATURE + r")\s*"
-EXPR = r"(?:" + OPERATORS + r"|\s|" + FEATURE + r")+"
+SYMBOL = r"(?:\w*[A-Z0-9]\w*){2,}"
+DEF = r"^\s*(?:menu){,1}config\s+(" + SYMBOL + r")\s*"
+EXPR = r"(?:" + OPERATORS + r"|\s|" + SYMBOL + r")+"
DEFAULT = r"default\s+.*?(?:if\s.+){,1}"
STMT = r"^\s*(?:if|select|depends\s+on|(?:" + DEFAULT + r"))\s+" + EXPR
-SOURCE_FEATURE = r"(?:\W|\b)+[D]{,1}CONFIG_(" + FEATURE + r")"
+SOURCE_SYMBOL = r"(?:\W|\b)+[D]{,1}CONFIG_(" + SYMBOL + r")"
# regex objects
REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$")
-REGEX_FEATURE = re.compile(r'(?!\B)' + FEATURE + r'(?!\B)')
-REGEX_SOURCE_FEATURE = re.compile(SOURCE_FEATURE)
+REGEX_SYMBOL = re.compile(r'(?!\B)' + SYMBOL + r'(?!\B)')
+REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL)
REGEX_KCONFIG_DEF = re.compile(DEF)
REGEX_KCONFIG_EXPR = re.compile(EXPR)
REGEX_KCONFIG_STMT = re.compile(STMT)
REGEX_KCONFIG_HELP = re.compile(r"^\s+(help|---help---)\s*$")
-REGEX_FILTER_FEATURES = re.compile(r"[A-Za-z0-9]$")
+REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$")
REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+")
REGEX_QUOTES = re.compile("(\"(.*?)\")")
def parse_options():
"""The user interface of this module."""
- usage = "%prog [options]\n\n" \
- "Run this tool to detect Kconfig symbols that are referenced but " \
- "not defined in\nKconfig. The output of this tool has the " \
- "format \'Undefined symbol\\tFile list\'\n\n" \
- "If no option is specified, %prog will default to check your\n" \
- "current tree. Please note that specifying commits will " \
- "\'git reset --hard\'\nyour current tree! You may save " \
- "uncommitted changes to avoid losing data."
+ usage = "Run this tool to detect Kconfig symbols that are referenced but " \
+ "not defined in Kconfig. If no option is specified, " \
+ "checkkconfigsymbols defaults to check your current tree. " \
+ "Please note that specifying commits will 'git reset --hard\' " \
+ "your current tree! You may save uncommitted changes to avoid " \
+ "losing data."
- parser = OptionParser(usage=usage)
+ parser = argparse.ArgumentParser(description=usage)
- parser.add_option('-c', '--commit', dest='commit', action='store',
- default="",
- help="Check if the specified commit (hash) introduces "
- "undefined Kconfig symbols.")
+ parser.add_argument('-c', '--commit', dest='commit', action='store',
+ default="",
+ help="check if the specified commit (hash) introduces "
+ "undefined Kconfig symbols")
- parser.add_option('-d', '--diff', dest='diff', action='store',
- default="",
- help="Diff undefined symbols between two commits. The "
- "input format bases on Git log's "
- "\'commmit1..commit2\'.")
+ parser.add_argument('-d', '--diff', dest='diff', action='store',
+ default="",
+ help="diff undefined symbols between two commits "
+ "(e.g., -d commmit1..commit2)")
- parser.add_option('-f', '--find', dest='find', action='store_true',
- default=False,
- help="Find and show commits that may cause symbols to be "
- "missing. Required to run with --diff.")
+ parser.add_argument('-f', '--find', dest='find', action='store_true',
+ default=False,
+ help="find and show commits that may cause symbols to be "
+ "missing (required to run with --diff)")
- parser.add_option('-i', '--ignore', dest='ignore', action='store',
- default="",
- help="Ignore files matching this pattern. Note that "
- "the pattern needs to be a Python regex. To "
- "ignore defconfigs, specify -i '.*defconfig'.")
+ parser.add_argument('-i', '--ignore', dest='ignore', action='store',
+ default="",
+ help="ignore files matching this Python regex "
+ "(e.g., -i '.*defconfig')")
- parser.add_option('-s', '--sim', dest='sim', action='store', default="",
- help="Print a list of maximum 10 string-similar symbols.")
+ parser.add_argument('-s', '--sim', dest='sim', action='store', default="",
+ help="print a list of max. 10 string-similar symbols")
- parser.add_option('', '--force', dest='force', action='store_true',
- default=False,
- help="Reset current Git tree even when it's dirty.")
+ parser.add_argument('--force', dest='force', action='store_true',
+ default=False,
+ help="reset current Git tree even when it's dirty")
- (opts, _) = parser.parse_args()
+ parser.add_argument('--no-color', dest='color', action='store_false',
+ default=True,
+ help="don't print colored output (default when not "
+ "outputting to a terminal)")
- if opts.commit and opts.diff:
+ args = parser.parse_args()
+
+ if args.commit and args.diff:
sys.exit("Please specify only one option at once.")
- if opts.diff and not re.match(r"^[\w\-\.]+\.\.[\w\-\.]+$", opts.diff):
+ if args.diff and not re.match(r"^[\w\-\.]+\.\.[\w\-\.]+$", args.diff):
sys.exit("Please specify valid input in the following format: "
"\'commit1..commit2\'")
- if opts.commit or opts.diff:
- if not opts.force and tree_is_dirty():
+ if args.commit or args.diff:
+ if not args.force and tree_is_dirty():
sys.exit("The current Git tree is dirty (see 'git status'). "
"Running this script may\ndelete important data since it "
"calls 'git reset --hard' for some performance\nreasons. "
@@ -100,138 +101,148 @@
"'--force' if you\nwant to ignore this warning and "
"continue.")
- if opts.commit:
- opts.find = False
+ if args.commit:
+ args.find = False
- if opts.ignore:
+ if args.ignore:
try:
- re.match(opts.ignore, "this/is/just/a/test.c")
+ re.match(args.ignore, "this/is/just/a/test.c")
except:
sys.exit("Please specify a valid Python regex.")
- return opts
+ return args
def main():
"""Main function of this module."""
- opts = parse_options()
+ args = parse_options()
- if opts.sim and not opts.commit and not opts.diff:
- sims = find_sims(opts.sim, opts.ignore)
+ global COLOR
+ COLOR = args.color and sys.stdout.isatty()
+
+ if args.sim and not args.commit and not args.diff:
+ sims = find_sims(args.sim, args.ignore)
if sims:
- print "%s: %s" % (yel("Similar symbols"), ', '.join(sims))
+ print("%s: %s" % (yel("Similar symbols"), ', '.join(sims)))
else:
- print "%s: no similar symbols found" % yel("Similar symbols")
+ print("%s: no similar symbols found" % yel("Similar symbols"))
sys.exit(0)
# dictionary of (un)defined symbols
defined = {}
undefined = {}
- if opts.commit or opts.diff:
+ if args.commit or args.diff:
head = get_head()
# get commit range
commit_a = None
commit_b = None
- if opts.commit:
- commit_a = opts.commit + "~"
- commit_b = opts.commit
- elif opts.diff:
- split = opts.diff.split("..")
+ if args.commit:
+ commit_a = args.commit + "~"
+ commit_b = args.commit
+ elif args.diff:
+ split = args.diff.split("..")
commit_a = split[0]
commit_b = split[1]
undefined_a = {}
undefined_b = {}
# get undefined items before the commit
- execute("git reset --hard %s" % commit_a)
- undefined_a, _ = check_symbols(opts.ignore)
+ reset(commit_a)
+ undefined_a, _ = check_symbols(args.ignore)
# get undefined items for the commit
- execute("git reset --hard %s" % commit_b)
- undefined_b, defined = check_symbols(opts.ignore)
+ reset(commit_b)
+ undefined_b, defined = check_symbols(args.ignore)
# report cases that are present for the commit but not before
- for feature in sorted(undefined_b):
- # feature has not been undefined before
- if not feature in undefined_a:
- files = sorted(undefined_b.get(feature))
- undefined[feature] = files
- # check if there are new files that reference the undefined feature
+ for symbol in sorted(undefined_b):
+ # symbol has not been undefined before
+ if symbol not in undefined_a:
+ files = sorted(undefined_b.get(symbol))
+ undefined[symbol] = files
+ # check if there are new files that reference the undefined symbol
else:
- files = sorted(undefined_b.get(feature) -
- undefined_a.get(feature))
+ files = sorted(undefined_b.get(symbol) -
+ undefined_a.get(symbol))
if files:
- undefined[feature] = files
+ undefined[symbol] = files
# reset to head
- execute("git reset --hard %s" % head)
+ reset(head)
# default to check the entire tree
else:
- undefined, defined = check_symbols(opts.ignore)
+ undefined, defined = check_symbols(args.ignore)
# now print the output
- for feature in sorted(undefined):
- print red(feature)
+ for symbol in sorted(undefined):
+ print(red(symbol))
- files = sorted(undefined.get(feature))
- print "%s: %s" % (yel("Referencing files"), ", ".join(files))
+ files = sorted(undefined.get(symbol))
+ print("%s: %s" % (yel("Referencing files"), ", ".join(files)))
- sims = find_sims(feature, opts.ignore, defined)
+ sims = find_sims(symbol, args.ignore, defined)
sims_out = yel("Similar symbols")
if sims:
- print "%s: %s" % (sims_out, ', '.join(sims))
+ print("%s: %s" % (sims_out, ', '.join(sims)))
else:
- print "%s: %s" % (sims_out, "no similar symbols found")
+ print("%s: %s" % (sims_out, "no similar symbols found"))
- if opts.find:
- print "%s:" % yel("Commits changing symbol")
- commits = find_commits(feature, opts.diff)
+ if args.find:
+ print("%s:" % yel("Commits changing symbol"))
+ commits = find_commits(symbol, args.diff)
if commits:
for commit in commits:
commit = commit.split(" ", 1)
- print "\t- %s (\"%s\")" % (yel(commit[0]), commit[1])
+ print("\t- %s (\"%s\")" % (yel(commit[0]), commit[1]))
else:
- print "\t- no commit found"
- print # new line
+ print("\t- no commit found")
+ print() # new line
+
+
+def reset(commit):
+ """Reset current git tree to %commit."""
+ execute(["git", "reset", "--hard", commit])
def yel(string):
"""
Color %string yellow.
"""
- return "\033[33m%s\033[0m" % string
+ return "\033[33m%s\033[0m" % string if COLOR else string
def red(string):
"""
Color %string red.
"""
- return "\033[31m%s\033[0m" % string
+ return "\033[31m%s\033[0m" % string if COLOR else string
def execute(cmd):
"""Execute %cmd and return stdout. Exit in case of error."""
- pop = Popen(cmd, stdout=PIPE, stderr=STDOUT, shell=True)
- (stdout, _) = pop.communicate() # wait until finished
- if pop.returncode != 0:
- sys.exit(stdout)
+ try:
+ stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
+ stdout = stdout.decode(errors='replace')
+ except subprocess.CalledProcessError as fail:
+ exit(fail)
return stdout
def find_commits(symbol, diff):
"""Find commits changing %symbol in the given range of %diff."""
- commits = execute("git log --pretty=oneline --abbrev-commit -G %s %s"
- % (symbol, diff))
+ commits = execute(["git", "log", "--pretty=oneline",
+ "--abbrev-commit", "-G",
+ symbol, diff])
return [x for x in commits.split("\n") if x]
def tree_is_dirty():
"""Return true if the current working tree is dirty (i.e., if any file has
been added, deleted, modified, renamed or copied but not committed)."""
- stdout = execute("git status --porcelain")
+ stdout = execute(["git", "status", "--porcelain"])
for line in stdout:
if re.findall(r"[URMADC]{1}", line[:2]):
return True
@@ -240,13 +251,13 @@
def get_head():
"""Return commit hash of current HEAD."""
- stdout = execute("git rev-parse HEAD")
+ stdout = execute(["git", "rev-parse", "HEAD"])
return stdout.strip('\n')
def partition(lst, size):
"""Partition list @lst into eveni-sized lists of size @size."""
- return [lst[i::size] for i in xrange(size)]
+ return [lst[i::size] for i in range(size)]
def init_worker():
@@ -254,7 +265,7 @@
signal.signal(signal.SIGINT, signal.SIG_IGN)
-def find_sims(symbol, ignore, defined = []):
+def find_sims(symbol, ignore, defined=[]):
"""Return a list of max. ten Kconfig symbols that are string-similar to
@symbol."""
if defined:
@@ -279,7 +290,7 @@
def get_files():
"""Return a list of all files in the current git directory."""
# use 'git ls-files' to get the worklist
- stdout = execute("git ls-files")
+ stdout = execute(["git", "ls-files"])
if len(stdout) > 0 and stdout[-1] == "\n":
stdout = stdout[:-1]
@@ -311,8 +322,8 @@
check_symbols() in order to properly terminate running worker processes."""
source_files = []
kconfig_files = []
- defined_features = []
- referenced_features = dict() # {file: [features]}
+ defined_symbols = []
+ referenced_symbols = dict() # {file: [symbols]}
for gitfile in get_files():
if REGEX_FILE_KCONFIG.match(gitfile):
@@ -326,76 +337,75 @@
# parse source files
arglist = partition(source_files, cpu_count())
for res in pool.map(parse_source_files, arglist):
- referenced_features.update(res)
-
+ referenced_symbols.update(res)
# parse kconfig files
arglist = []
for part in partition(kconfig_files, cpu_count()):
arglist.append((part, ignore))
for res in pool.map(parse_kconfig_files, arglist):
- defined_features.extend(res[0])
- referenced_features.update(res[1])
- defined_features = set(defined_features)
+ defined_symbols.extend(res[0])
+ referenced_symbols.update(res[1])
+ defined_symbols = set(defined_symbols)
- # inverse mapping of referenced_features to dict(feature: [files])
+ # inverse mapping of referenced_symbols to dict(symbol: [files])
inv_map = dict()
- for _file, features in referenced_features.iteritems():
- for feature in features:
- inv_map[feature] = inv_map.get(feature, set())
- inv_map[feature].add(_file)
- referenced_features = inv_map
+ for _file, symbols in referenced_symbols.items():
+ for symbol in symbols:
+ inv_map[symbol] = inv_map.get(symbol, set())
+ inv_map[symbol].add(_file)
+ referenced_symbols = inv_map
- undefined = {} # {feature: [files]}
- for feature in sorted(referenced_features):
+ undefined = {} # {symbol: [files]}
+ for symbol in sorted(referenced_symbols):
# filter some false positives
- if feature == "FOO" or feature == "BAR" or \
- feature == "FOO_BAR" or feature == "XXX":
+ if symbol == "FOO" or symbol == "BAR" or \
+ symbol == "FOO_BAR" or symbol == "XXX":
continue
- if feature not in defined_features:
- if feature.endswith("_MODULE"):
+ if symbol not in defined_symbols:
+ if symbol.endswith("_MODULE"):
# avoid false positives for kernel modules
- if feature[:-len("_MODULE")] in defined_features:
+ if symbol[:-len("_MODULE")] in defined_symbols:
continue
- undefined[feature] = referenced_features.get(feature)
- return undefined, defined_features
+ undefined[symbol] = referenced_symbols.get(symbol)
+ return undefined, defined_symbols
def parse_source_files(source_files):
"""Parse each source file in @source_files and return dictionary with source
files as keys and lists of references Kconfig symbols as values."""
- referenced_features = dict()
+ referenced_symbols = dict()
for sfile in source_files:
- referenced_features[sfile] = parse_source_file(sfile)
- return referenced_features
+ referenced_symbols[sfile] = parse_source_file(sfile)
+ return referenced_symbols
def parse_source_file(sfile):
- """Parse @sfile and return a list of referenced Kconfig features."""
+ """Parse @sfile and return a list of referenced Kconfig symbols."""
lines = []
references = []
if not os.path.exists(sfile):
return references
- with open(sfile, "r") as stream:
+ with open(sfile, "r", encoding='utf-8', errors='replace') as stream:
lines = stream.readlines()
for line in lines:
- if not "CONFIG_" in line:
+ if "CONFIG_" not in line:
continue
- features = REGEX_SOURCE_FEATURE.findall(line)
- for feature in features:
- if not REGEX_FILTER_FEATURES.search(feature):
+ symbols = REGEX_SOURCE_SYMBOL.findall(line)
+ for symbol in symbols:
+ if not REGEX_FILTER_SYMBOLS.search(symbol):
continue
- references.append(feature)
+ references.append(symbol)
return references
-def get_features_in_line(line):
- """Return mentioned Kconfig features in @line."""
- return REGEX_FEATURE.findall(line)
+def get_symbols_in_line(line):
+ """Return mentioned Kconfig symbols in @line."""
+ return REGEX_SYMBOL.findall(line)
def parse_kconfig_files(args):
@@ -404,21 +414,21 @@
pattern."""
kconfig_files = args[0]
ignore = args[1]
- defined_features = []
- referenced_features = dict()
+ defined_symbols = []
+ referenced_symbols = dict()
for kfile in kconfig_files:
defined, references = parse_kconfig_file(kfile)
- defined_features.extend(defined)
+ defined_symbols.extend(defined)
if ignore and re.match(ignore, kfile):
# do not collect references for files that match the ignore pattern
continue
- referenced_features[kfile] = references
- return (defined_features, referenced_features)
+ referenced_symbols[kfile] = references
+ return (defined_symbols, referenced_symbols)
def parse_kconfig_file(kfile):
- """Parse @kfile and update feature definitions and references."""
+ """Parse @kfile and update symbol definitions and references."""
lines = []
defined = []
references = []
@@ -427,7 +437,7 @@
if not os.path.exists(kfile):
return defined, references
- with open(kfile, "r") as stream:
+ with open(kfile, "r", encoding='utf-8', errors='replace') as stream:
lines = stream.readlines()
for i in range(len(lines)):
@@ -436,8 +446,8 @@
line = line.split("#")[0] # ignore comments
if REGEX_KCONFIG_DEF.match(line):
- feature_def = REGEX_KCONFIG_DEF.findall(line)
- defined.append(feature_def[0])
+ symbol_def = REGEX_KCONFIG_DEF.findall(line)
+ defined.append(symbol_def[0])
skip = False
elif REGEX_KCONFIG_HELP.match(line):
skip = True
@@ -446,18 +456,18 @@
pass
elif REGEX_KCONFIG_STMT.match(line):
line = REGEX_QUOTES.sub("", line)
- features = get_features_in_line(line)
+ symbols = get_symbols_in_line(line)
# multi-line statements
while line.endswith("\\"):
i += 1
line = lines[i]
line = line.strip('\n')
- features.extend(get_features_in_line(line))
- for feature in set(features):
- if REGEX_NUMERIC.match(feature):
+ symbols.extend(get_symbols_in_line(line))
+ for symbol in set(symbols):
+ if REGEX_NUMERIC.match(symbol):
# ignore numeric values
continue
- references.append(feature)
+ references.append(symbol)
return defined, references
diff --git a/scripts/faddr2line b/scripts/faddr2line
new file mode 100755
index 0000000..450b332
--- /dev/null
+++ b/scripts/faddr2line
@@ -0,0 +1,177 @@
+#!/bin/bash
+#
+# Translate stack dump function offsets.
+#
+# addr2line doesn't work with KASLR addresses. This works similarly to
+# addr2line, but instead takes the 'func+0x123' format as input:
+#
+# $ ./scripts/faddr2line ~/k/vmlinux meminfo_proc_show+0x5/0x568
+# meminfo_proc_show+0x5/0x568:
+# meminfo_proc_show at fs/proc/meminfo.c:27
+#
+# If the address is part of an inlined function, the full inline call chain is
+# printed:
+#
+# $ ./scripts/faddr2line ~/k/vmlinux native_write_msr+0x6/0x27
+# native_write_msr+0x6/0x27:
+# arch_static_branch at arch/x86/include/asm/msr.h:121
+# (inlined by) static_key_false at include/linux/jump_label.h:125
+# (inlined by) native_write_msr at arch/x86/include/asm/msr.h:125
+#
+# The function size after the '/' in the input is optional, but recommended.
+# It's used to help disambiguate any duplicate symbol names, which can occur
+# rarely. If the size is omitted for a duplicate symbol then it's possible for
+# multiple code sites to be printed:
+#
+# $ ./scripts/faddr2line ~/k/vmlinux raw_ioctl+0x5
+# raw_ioctl+0x5/0x20:
+# raw_ioctl at drivers/char/raw.c:122
+#
+# raw_ioctl+0x5/0xb1:
+# raw_ioctl at net/ipv4/raw.c:876
+#
+# Multiple addresses can be specified on a single command line:
+#
+# $ ./scripts/faddr2line ~/k/vmlinux type_show+0x10/45 free_reserved_area+0x90
+# type_show+0x10/0x2d:
+# type_show at drivers/video/backlight/backlight.c:213
+#
+# free_reserved_area+0x90/0x123:
+# free_reserved_area at mm/page_alloc.c:6429 (discriminator 2)
+
+
+set -o errexit
+set -o nounset
+
+command -v awk >/dev/null 2>&1 || die "awk isn't installed"
+command -v readelf >/dev/null 2>&1 || die "readelf isn't installed"
+command -v addr2line >/dev/null 2>&1 || die "addr2line isn't installed"
+
+usage() {
+ echo "usage: faddr2line <object file> <func+offset> <func+offset>..." >&2
+ exit 1
+}
+
+warn() {
+ echo "$1" >&2
+}
+
+die() {
+ echo "ERROR: $1" >&2
+ exit 1
+}
+
+# Try to figure out the source directory prefix so we can remove it from the
+# addr2line output. HACK ALERT: This assumes that start_kernel() is in
+# kernel/init.c! This only works for vmlinux. Otherwise it falls back to
+# printing the absolute path.
+find_dir_prefix() {
+ local objfile=$1
+
+ local start_kernel_addr=$(readelf -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}')
+ [[ -z $start_kernel_addr ]] && return
+
+ local file_line=$(addr2line -e $objfile $start_kernel_addr)
+ [[ -z $file_line ]] && return
+
+ local prefix=${file_line%init/main.c:*}
+ if [[ -z $prefix ]] || [[ $prefix = $file_line ]]; then
+ return
+ fi
+
+ DIR_PREFIX=$prefix
+ return 0
+}
+
+__faddr2line() {
+ local objfile=$1
+ local func_addr=$2
+ local dir_prefix=$3
+ local print_warnings=$4
+
+ local func=${func_addr%+*}
+ local offset=${func_addr#*+}
+ offset=${offset%/*}
+ local size=
+ [[ $func_addr =~ "/" ]] && size=${func_addr#*/}
+
+ if [[ -z $func ]] || [[ -z $offset ]] || [[ $func = $func_addr ]]; then
+ warn "bad func+offset $func_addr"
+ DONE=1
+ return
+ fi
+
+ # Go through each of the object's symbols which match the func name.
+ # In rare cases there might be duplicates.
+ while read symbol; do
+ local fields=($symbol)
+ local sym_base=0x${fields[1]}
+ local sym_size=${fields[2]}
+ local sym_type=${fields[3]}
+
+ # calculate the address
+ local addr=$(($sym_base + $offset))
+ if [[ -z $addr ]] || [[ $addr = 0 ]]; then
+ warn "bad address: $sym_base + $offset"
+ DONE=1
+ return
+ fi
+ local hexaddr=0x$(printf %x $addr)
+
+ # weed out non-function symbols
+ if [[ $sym_type != "FUNC" ]]; then
+ [[ $print_warnings = 1 ]] &&
+ echo "skipping $func address at $hexaddr due to non-function symbol"
+ continue
+ fi
+
+ # if the user provided a size, make sure it matches the symbol's size
+ if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then
+ [[ $print_warnings = 1 ]] &&
+ echo "skipping $func address at $hexaddr due to size mismatch ($size != $sym_size)"
+ continue;
+ fi
+
+ # make sure the provided offset is within the symbol's range
+ if [[ $offset -gt $sym_size ]]; then
+ [[ $print_warnings = 1 ]] &&
+ echo "skipping $func address at $hexaddr due to size mismatch ($offset > $sym_size)"
+ continue
+ fi
+
+ # separate multiple entries with a blank line
+ [[ $FIRST = 0 ]] && echo
+ FIRST=0
+
+ local hexsize=0x$(printf %x $sym_size)
+ echo "$func+$offset/$hexsize:"
+ addr2line -fpie $objfile $hexaddr | sed "s; $dir_prefix\(\./\)*; ;"
+ DONE=1
+
+ done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}')
+}
+
+[[ $# -lt 2 ]] && usage
+
+objfile=$1
+[[ ! -f $objfile ]] && die "can't find objfile $objfile"
+shift
+
+DIR_PREFIX=supercalifragilisticexpialidocious
+find_dir_prefix $objfile
+
+FIRST=1
+while [[ $# -gt 0 ]]; do
+ func_addr=$1
+ shift
+
+ # print any matches found
+ DONE=0
+ __faddr2line $objfile $func_addr $DIR_PREFIX 0
+
+ # if no match was found, print warnings
+ if [[ $DONE = 0 ]]; then
+ __faddr2line $objfile $func_addr $DIR_PREFIX 1
+ warn "no match for $func_addr"
+ fi
+done
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 42396a7..a68f031 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -363,6 +363,7 @@
strcmp(".sched.text", txtname) == 0 ||
strcmp(".spinlock.text", txtname) == 0 ||
strcmp(".irqentry.text", txtname) == 0 ||
+ strcmp(".softirqentry.text", txtname) == 0 ||
strcmp(".kprobes.text", txtname) == 0 ||
strcmp(".text.unlikely", txtname) == 0;
}
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 96e2486..2d48011 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -134,6 +134,7 @@
".sched.text" => 1,
".spinlock.text" => 1,
".irqentry.text" => 1,
+ ".softirqentry.text" => 1,
".kprobes.text" => 1,
".text.unlikely" => 1,
);
diff --git a/scripts/ver_linux b/scripts/ver_linux
index 0d8bd29..430b201 100755
--- a/scripts/ver_linux
+++ b/scripts/ver_linux
@@ -1,193 +1,89 @@
-#!/bin/sh
+#!/bin/awk -f
# Before running this script please ensure that your PATH is
# typical as you use for compilation/installation. I use
# /bin /sbin /usr/bin /usr/sbin /usr/local/bin, but it may
# differ on your system.
-#
-echo 'If some fields are empty or look unusual you may have an old version.'
-echo 'Compare to the current minimal requirements in Documentation/Changes.'
-echo ' '
-uname -a
-echo ' '
+BEGIN {
+ usage = "If some fields are empty or look unusual you may have an old version.\n"
+ usage = usage "Compare to the current minimal requirements in Documentation/Changes.\n"
+ print usage
-gcc -dumpversion 2>&1 |
-awk '/[0-9]+([.]?[0-9]+)+/ && !/not found$/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("GNU C\t\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
+ system("uname -a")
+ printf("\n")
-make --version 2>&1 |
-awk '/GNU Make/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("GNU Make\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
+ printversion("GNU C", version("gcc -dumpversion 2>&1"))
+ printversion("GNU Make", version("make --version 2>&1"))
+ printversion("Binutils", version("ld -v 2>&1"))
+ printversion("Util-linux", version("mount --version 2>&1"))
+ printversion("Mount", version("mount --version 2>&1"))
+ printversion("Module-init-tools", version("depmod -V 2>&1"))
+ printversion("E2fsprogs", version("tune2fs 2>&1"))
+ printversion("Jfsutils", version("fsck.jfs -V 2>&1"))
+ printversion("Reiserfsprogs", version("reiserfsck -V 2>&1"))
+ printversion("Reiser4fsprogs", version("fsck.reiser4 -V 2>&1"))
+ printversion("Xfsprogs", version("xfs_db -V 2>&1"))
+ printversion("Pcmciautils", version("pccardctl -V 2>&1"))
+ printversion("Pcmcia-cs", version("cardmgr -V 2>&1"))
+ printversion("Quota-tools", version("quota -V 2>&1"))
+ printversion("PPP", version("pppd --version 2>&1"))
+ printversion("Isdn4k-utils", version("isdnctrl 2>&1"))
+ printversion("Nfs-utils", version("showmount --version 2>&1"))
-ld -v 2>&1 |
-awk '/[0-9]+([.]?[0-9]+)+/ && !/not found$/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("Binutils\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-
-mount --version 2>&1 |
-awk '/[0-9]+([.]?[0-9]+)+/ && !/not found$/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- $0 = substr($0,RSTART,RLENGTH)
- printf("Util-linux\t\t%s\nMount\t\t\t%s\n",$0,$0)
-}'
-
-depmod -V 2>&1 |
-awk '/[0-9]+([.]?[0-9]+)+/ && !/not found$/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("Module-init-tools\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-
-tune2fs 2>&1 |
-awk '/^tune2fs/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("E2fsprogs\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-
-fsck.jfs -V 2>&1 |
-awk '/version/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("Jfsutils\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-
-reiserfsck -V 2>&1 |
-awk '/^reiserfsck/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("Reiserfsprogs\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-
-fsck.reiser4 -V 2>&1 | grep ^fsck.reiser4 | awk \
-'NR==1{print "reiser4progs ", $2}'
-
-xfs_db -V 2>&1 |
-awk '/version/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("Xfsprogs\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-
-pccardctl -V 2>&1 |
-awk '/pcmciautils/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("Pcmciautils\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-
-cardmgr -V 2>&1| grep version | awk \
-'NR==1{print "pcmcia-cs ", $3}'
-
-quota -V 2>&1 |
-awk '/version/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("Quota-tools\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-
-pppd --version 2>&1 |
-awk '/version/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("PPP\t\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-
-isdnctrl 2>&1 | grep version | awk \
-'NR==1{print "isdn4k-utils ", $NF}'
-
-showmount --version 2>&1 | grep nfs-utils | awk \
-'NR==1{print "nfs-utils ", $NF}'
-
-test -r /proc/self/maps &&
-sed '
- /.*libc-\(.*\)\.so$/!d
- s//Linux C Library\t\t\1/
- q
-' /proc/self/maps
-
-ldd --version 2>&1 |
-awk '/^ldd/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("Dynamic linker (ldd)\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-
-libcpp=`ldconfig -p 2>/dev/null |
- awk '/(libg|stdc)[+]+\.so/ {
- print $NF
- exit
+ if (system("test -r /proc/self/maps") == 0) {
+ while (getline <"/proc/self/maps" > 0) {
+ n = split($0, procmaps, "/")
+ if (/libc.*so$/ && match(procmaps[n], /[0-9]+([.]?[0-9]+)+/)) {
+ ver = substr(procmaps[n], RSTART, RLENGTH)
+ printversion("Linux C Library", ver)
+ break
+ }
+ }
}
-'`
-test -r "$libcpp" &&
-ls -l $libcpp |
-sed '
- s!.*so\.!!
- s!^!Linux C++ Library\t!
-'
-ps --version 2>&1 |
-awk '/version/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("Procps\t\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-ifconfig --version 2>&1 |
-awk '/tools/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("Net-tools\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
+ printversion("Dynamic linker (ldd)", version("ldd --version 2>&1"))
-loadkeys -V 2>&1 |
-awk '/[0-9]+([.]?[0-9]+)+/ && !/not found$/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- $0 = substr($0,RSTART,RLENGTH)
- printf("Kbd\t\t\t%s\nConsole-tools\t\t%s\n",$0,$0)
-}'
+ while ("ldconfig -p 2>/dev/null" | getline > 0) {
+ if (/(libg|stdc)[+]+\.so/) {
+ libcpp = $NF
+ break
+ }
+ }
+ if (system("test -r " libcpp) == 0)
+ printversion("Linux C++ Library", version("readlink " libcpp))
-oprofiled --version 2>&1 | awk \
-'(NR==1 && ($2 == "oprofile")) {print "oprofile ", $3}'
+ printversion("Procps", version("ps --version 2>&1"))
+ printversion("Net-tools", version("ifconfig --version 2>&1"))
+ printversion("Kbd", version("loadkeys -V 2>&1"))
+ printversion("Console-tools", version("loadkeys -V 2>&1"))
+ printversion("Oprofile", version("oprofiled --version 2>&1"))
+ printversion("Sh-utils", version("expr --v 2>&1"))
+ printversion("Udev", version("udevadm --version 2>&1"))
+ printversion("Wireless-tools", version("iwconfig --version 2>&1"))
-expr --v 2>&1 |
-awk '/^expr/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("Sh-utils\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-
-udevadm --version 2>&1 |
-awk '/[0-9]+([.]?[0-9]+)+/ && !/not found$/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("Udev\t\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-
-iwconfig --version 2>&1 |
-awk '/version/{
- match($0, /[0-9]+([.]?[0-9]+)+/)
- printf("Wireless-tools\t\t%s\n",
- substr($0,RSTART,RLENGTH))
-}'
-
-test -e /proc/modules &&
-sort /proc/modules |
-sed '
- s/ .*//
- H
-${
- g
- s/^\n/Modules Loaded\t\t/
- y/\n/ /
- q
+ if (system("test -r /proc/modules") == 0) {
+ while ("sort /proc/modules" | getline > 0) {
+ mods = mods sep $1
+ sep = " "
+ }
+ printversion("Modules Loaded", mods)
+ }
}
- d
-'
+
+function version(cmd, ver) {
+ while (cmd | getline > 0) {
+ if (!/ver_linux/ && match($0, /[0-9]+([.]?[0-9]+)+/)) {
+ ver = substr($0, RSTART, RLENGTH)
+ break
+ }
+ }
+ close(cmd)
+ return ver
+}
+
+function printversion(name, value, ofmt) {
+ if (value != "") {
+ ofmt = "%-20s\t%s\n"
+ printf(ofmt, name, value)
+ }
+}
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 5adbfc3..17a0610 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -29,6 +29,7 @@
#include <linux/rcupdate.h>
#include <linux/scatterlist.h>
#include <linux/ctype.h>
+#include <crypto/aes.h>
#include <crypto/hash.h>
#include <crypto/sha.h>
#include <crypto/skcipher.h>
@@ -478,6 +479,7 @@
struct crypto_skcipher *tfm;
struct skcipher_request *req;
unsigned int encrypted_datalen;
+ u8 iv[AES_BLOCK_SIZE];
unsigned int padlen;
char pad[16];
int ret;
@@ -500,8 +502,8 @@
sg_init_table(sg_out, 1);
sg_set_buf(sg_out, epayload->encrypted_data, encrypted_datalen);
- skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen,
- epayload->iv);
+ memcpy(iv, epayload->iv, sizeof(iv));
+ skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv);
ret = crypto_skcipher_encrypt(req);
tfm = crypto_skcipher_reqtfm(req);
skcipher_request_free(req);
@@ -581,6 +583,7 @@
struct crypto_skcipher *tfm;
struct skcipher_request *req;
unsigned int encrypted_datalen;
+ u8 iv[AES_BLOCK_SIZE];
char pad[16];
int ret;
@@ -599,8 +602,8 @@
epayload->decrypted_datalen);
sg_set_buf(&sg_out[1], pad, sizeof pad);
- skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen,
- epayload->iv);
+ memcpy(iv, epayload->iv, sizeof(iv));
+ skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv);
ret = crypto_skcipher_decrypt(req);
tfm = crypto_skcipher_reqtfm(req);
skcipher_request_free(req);
diff --git a/tools/arch/alpha/include/uapi/asm/mman.h b/tools/arch/alpha/include/uapi/asm/mman.h
new file mode 100644
index 0000000..e38b64c
--- /dev/null
+++ b/tools/arch/alpha/include/uapi/asm/mman.h
@@ -0,0 +1,47 @@
+#ifndef TOOLS_ARCH_ALPHA_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ALPHA_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP 17
+#define MADV_DOFORK 11
+#define MADV_DONTDUMP 16
+#define MADV_DONTFORK 10
+#define MADV_DONTNEED 6
+#define MADV_FREE 8
+#define MADV_HUGEPAGE 14
+#define MADV_MERGEABLE 12
+#define MADV_NOHUGEPAGE 15
+#define MADV_NORMAL 0
+#define MADV_RANDOM 1
+#define MADV_REMOVE 9
+#define MADV_SEQUENTIAL 2
+#define MADV_UNMERGEABLE 13
+#define MADV_WILLNEED 3
+#define MAP_ANONYMOUS 0x10
+#define MAP_DENYWRITE 0x02000
+#define MAP_EXECUTABLE 0x04000
+#define MAP_FILE 0
+#define MAP_FIXED 0x100
+#define MAP_GROWSDOWN 0x01000
+#define MAP_HUGETLB 0x100000
+#define MAP_LOCKED 0x08000
+#define MAP_NONBLOCK 0x40000
+#define MAP_NORESERVE 0x10000
+#define MAP_POPULATE 0x20000
+#define MAP_PRIVATE 0x02
+#define MAP_SHARED 0x01
+#define MAP_STACK 0x80000
+#define PROT_EXEC 0x4
+#define PROT_GROWSDOWN 0x01000000
+#define PROT_GROWSUP 0x02000000
+#define PROT_NONE 0x0
+#define PROT_READ 0x1
+#define PROT_SEM 0x8
+#define PROT_WRITE 0x2
+/* MADV_HWPOISON is undefined on alpha, fix it for perf */
+#define MADV_HWPOISON 100
+/* MADV_SOFT_OFFLINE is undefined on alpha, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on alpha, fix it for perf */
+#define MAP_32BIT 0
+/* MAP_UNINITIALIZED is undefined on alpha, fix it for perf */
+#define MAP_UNINITIALIZED 0
+#endif
diff --git a/tools/arch/arc/include/uapi/asm/mman.h b/tools/arch/arc/include/uapi/asm/mman.h
new file mode 100644
index 0000000..aa3acd2
--- /dev/null
+++ b/tools/arch/arc/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_ARC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ARC_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on arc, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/arm/include/uapi/asm/mman.h b/tools/arch/arm/include/uapi/asm/mman.h
new file mode 100644
index 0000000..478f699
--- /dev/null
+++ b/tools/arch/arm/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_ARM_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ARM_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on arm, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/arm64/include/uapi/asm/mman.h b/tools/arch/arm64/include/uapi/asm/mman.h
new file mode 100644
index 0000000..70fd311
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_ARM64_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ARM64_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on arm64, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/frv/include/uapi/asm/mman.h b/tools/arch/frv/include/uapi/asm/mman.h
new file mode 100644
index 0000000..5be78ac
--- /dev/null
+++ b/tools/arch/frv/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_FRV_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_FRV_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on frv, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/h8300/include/uapi/asm/mman.h b/tools/arch/h8300/include/uapi/asm/mman.h
new file mode 100644
index 0000000..9d9ac54
--- /dev/null
+++ b/tools/arch/h8300/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_H8300_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_H8300_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on h8300, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/hexagon/include/uapi/asm/mman.h b/tools/arch/hexagon/include/uapi/asm/mman.h
new file mode 100644
index 0000000..102f3fa
--- /dev/null
+++ b/tools/arch/hexagon/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_HEXAGON_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_HEXAGON_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on hexagon, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/ia64/include/uapi/asm/mman.h b/tools/arch/ia64/include/uapi/asm/mman.h
new file mode 100644
index 0000000..1d6e5ac
--- /dev/null
+++ b/tools/arch/ia64/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on ia64, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/m32r/include/uapi/asm/mman.h b/tools/arch/m32r/include/uapi/asm/mman.h
new file mode 100644
index 0000000..1c29635
--- /dev/null
+++ b/tools/arch/m32r/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_M32R_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_M32R_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on m32r, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/microblaze/include/uapi/asm/mman.h b/tools/arch/microblaze/include/uapi/asm/mman.h
new file mode 100644
index 0000000..005cd50
--- /dev/null
+++ b/tools/arch/microblaze/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_MICROBLAZE_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_MICROBLAZE_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on microblaze, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/mips/include/uapi/asm/mman.h b/tools/arch/mips/include/uapi/asm/mman.h
new file mode 100644
index 0000000..c020529
--- /dev/null
+++ b/tools/arch/mips/include/uapi/asm/mman.h
@@ -0,0 +1,46 @@
+#ifndef TOOLS_ARCH_MIPS_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_MIPS_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP 17
+#define MADV_DOFORK 11
+#define MADV_DONTDUMP 16
+#define MADV_DONTFORK 10
+#define MADV_DONTNEED 4
+#define MADV_FREE 8
+#define MADV_HUGEPAGE 14
+#define MADV_HWPOISON 100
+#define MADV_MERGEABLE 12
+#define MADV_NOHUGEPAGE 15
+#define MADV_NORMAL 0
+#define MADV_RANDOM 1
+#define MADV_REMOVE 9
+#define MADV_SEQUENTIAL 2
+#define MADV_UNMERGEABLE 13
+#define MADV_WILLNEED 3
+#define MAP_ANONYMOUS 0x0800
+#define MAP_DENYWRITE 0x2000
+#define MAP_EXECUTABLE 0x4000
+#define MAP_FILE 0
+#define MAP_FIXED 0x010
+#define MAP_GROWSDOWN 0x1000
+#define MAP_HUGETLB 0x80000
+#define MAP_LOCKED 0x8000
+#define MAP_NONBLOCK 0x20000
+#define MAP_NORESERVE 0x0400
+#define MAP_POPULATE 0x10000
+#define MAP_PRIVATE 0x002
+#define MAP_SHARED 0x001
+#define MAP_STACK 0x40000
+#define PROT_EXEC 0x04
+#define PROT_GROWSDOWN 0x01000000
+#define PROT_GROWSUP 0x02000000
+#define PROT_NONE 0x00
+#define PROT_READ 0x01
+#define PROT_SEM 0x10
+#define PROT_WRITE 0x02
+/* MADV_SOFT_OFFLINE is undefined on mips, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on mips, fix it for perf */
+#define MAP_32BIT 0
+/* MAP_UNINITIALIZED is undefined on mips, fix it for perf */
+#define MAP_UNINITIALIZED 0
+#endif
diff --git a/tools/arch/mn10300/include/uapi/asm/mman.h b/tools/arch/mn10300/include/uapi/asm/mman.h
new file mode 100644
index 0000000..c1ea36d
--- /dev/null
+++ b/tools/arch/mn10300/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_MN10300_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_MN10300_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on mn10300, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h
new file mode 100644
index 0000000..03d8d5b
--- /dev/null
+++ b/tools/arch/parisc/include/uapi/asm/mman.h
@@ -0,0 +1,47 @@
+#ifndef TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP 70
+#define MADV_DOFORK 11
+#define MADV_DONTDUMP 69
+#define MADV_DONTFORK 10
+#define MADV_DONTNEED 4
+#define MADV_FREE 8
+#define MADV_HUGEPAGE 67
+#define MADV_MERGEABLE 65
+#define MADV_NOHUGEPAGE 68
+#define MADV_NORMAL 0
+#define MADV_RANDOM 1
+#define MADV_REMOVE 9
+#define MADV_SEQUENTIAL 2
+#define MADV_UNMERGEABLE 66
+#define MADV_WILLNEED 3
+#define MAP_ANONYMOUS 0x10
+#define MAP_DENYWRITE 0x0800
+#define MAP_EXECUTABLE 0x1000
+#define MAP_FILE 0
+#define MAP_FIXED 0x04
+#define MAP_GROWSDOWN 0x8000
+#define MAP_HUGETLB 0x80000
+#define MAP_LOCKED 0x2000
+#define MAP_NONBLOCK 0x20000
+#define MAP_NORESERVE 0x4000
+#define MAP_POPULATE 0x10000
+#define MAP_PRIVATE 0x02
+#define MAP_SHARED 0x01
+#define MAP_STACK 0x40000
+#define PROT_EXEC 0x4
+#define PROT_GROWSDOWN 0x01000000
+#define PROT_GROWSUP 0x02000000
+#define PROT_NONE 0x0
+#define PROT_READ 0x1
+#define PROT_SEM 0x8
+#define PROT_WRITE 0x2
+/* MADV_HWPOISON is undefined on parisc, fix it for perf */
+#define MADV_HWPOISON 100
+/* MADV_SOFT_OFFLINE is undefined on parisc, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on parisc, fix it for perf */
+#define MAP_32BIT 0
+/* MAP_UNINITIALIZED is undefined on parisc, fix it for perf */
+#define MAP_UNINITIALIZED 0
+#endif
diff --git a/tools/arch/powerpc/include/uapi/asm/mman.h b/tools/arch/powerpc/include/uapi/asm/mman.h
new file mode 100644
index 0000000..761db43
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/mman.h
@@ -0,0 +1,15 @@
+#ifndef TOOLS_ARCH_POWERPC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_POWERPC_UAPI_ASM_MMAN_FIX_H
+#define MAP_DENYWRITE 0x0800
+#define MAP_EXECUTABLE 0x1000
+#define MAP_GROWSDOWN 0x0100
+#define MAP_HUGETLB 0x40000
+#define MAP_LOCKED 0x80
+#define MAP_NONBLOCK 0x10000
+#define MAP_NORESERVE 0x40
+#define MAP_POPULATE 0x8000
+#define MAP_STACK 0x20000
+#include <uapi/asm-generic/mman-common.h>
+/* MAP_32BIT is undefined on powerpc, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/s390/include/uapi/asm/mman.h b/tools/arch/s390/include/uapi/asm/mman.h
new file mode 100644
index 0000000..b03dea9
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_S390_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_S390_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on s390, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/score/include/uapi/asm/mman.h b/tools/arch/score/include/uapi/asm/mman.h
new file mode 100644
index 0000000..2f8fb89
--- /dev/null
+++ b/tools/arch/score/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_SCORE_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_SCORE_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on score, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/sh/include/uapi/asm/mman.h b/tools/arch/sh/include/uapi/asm/mman.h
new file mode 100644
index 0000000..26504f6
--- /dev/null
+++ b/tools/arch/sh/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_SH_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_SH_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on sh, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/sparc/include/uapi/asm/mman.h b/tools/arch/sparc/include/uapi/asm/mman.h
new file mode 100644
index 0000000..8640525
--- /dev/null
+++ b/tools/arch/sparc/include/uapi/asm/mman.h
@@ -0,0 +1,15 @@
+#ifndef TOOLS_ARCH_SPARC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_SPARC_UAPI_ASM_MMAN_FIX_H
+#define MAP_DENYWRITE 0x0800
+#define MAP_EXECUTABLE 0x1000
+#define MAP_GROWSDOWN 0x0200
+#define MAP_HUGETLB 0x40000
+#define MAP_LOCKED 0x100
+#define MAP_NONBLOCK 0x10000
+#define MAP_NORESERVE 0x40
+#define MAP_POPULATE 0x8000
+#define MAP_STACK 0x20000
+#include <uapi/asm-generic/mman-common.h>
+/* MAP_32BIT is undefined on sparc, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/tile/include/uapi/asm/mman.h b/tools/arch/tile/include/uapi/asm/mman.h
new file mode 100644
index 0000000..7116c4b
--- /dev/null
+++ b/tools/arch/tile/include/uapi/asm/mman.h
@@ -0,0 +1,15 @@
+#ifndef TOOLS_ARCH_TILE_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_TILE_UAPI_ASM_MMAN_FIX_H
+#define MAP_DENYWRITE 0x0800
+#define MAP_EXECUTABLE 0x1000
+#define MAP_GROWSDOWN 0x0100
+#define MAP_HUGETLB 0x4000
+#define MAP_LOCKED 0x0200
+#define MAP_NONBLOCK 0x0080
+#define MAP_NORESERVE 0x0400
+#define MAP_POPULATE 0x0040
+#define MAP_STACK MAP_GROWSDOWN
+#include <uapi/asm-generic/mman-common.h>
+/* MAP_32BIT is undefined on tile, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/x86/include/uapi/asm/mman.h b/tools/arch/x86/include/uapi/asm/mman.h
new file mode 100644
index 0000000..b73c1af
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/mman.h
@@ -0,0 +1,5 @@
+#ifndef TOOLS_ARCH_X86_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_X86_UAPI_ASM_MMAN_FIX_H
+#define MAP_32BIT 0x40
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/xtensa/include/uapi/asm/mman.h b/tools/arch/xtensa/include/uapi/asm/mman.h
new file mode 100644
index 0000000..4453195
--- /dev/null
+++ b/tools/arch/xtensa/include/uapi/asm/mman.h
@@ -0,0 +1,47 @@
+#ifndef TOOLS_ARCH_XTENSA_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_XTENSA_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP 17
+#define MADV_DOFORK 11
+#define MADV_DONTDUMP 16
+#define MADV_DONTFORK 10
+#define MADV_DONTNEED 4
+#define MADV_FREE 8
+#define MADV_HUGEPAGE 14
+#define MADV_MERGEABLE 12
+#define MADV_NOHUGEPAGE 15
+#define MADV_NORMAL 0
+#define MADV_RANDOM 1
+#define MADV_REMOVE 9
+#define MADV_SEQUENTIAL 2
+#define MADV_UNMERGEABLE 13
+#define MADV_WILLNEED 3
+#define MAP_ANONYMOUS 0x0800
+#define MAP_DENYWRITE 0x2000
+#define MAP_EXECUTABLE 0x4000
+#define MAP_FILE 0
+#define MAP_FIXED 0x010
+#define MAP_GROWSDOWN 0x1000
+#define MAP_HUGETLB 0x80000
+#define MAP_LOCKED 0x8000
+#define MAP_NONBLOCK 0x20000
+#define MAP_NORESERVE 0x0400
+#define MAP_POPULATE 0x10000
+#define MAP_PRIVATE 0x002
+#define MAP_SHARED 0x001
+#define MAP_STACK 0x40000
+#define PROT_EXEC 0x4
+#define PROT_GROWSDOWN 0x01000000
+#define PROT_GROWSUP 0x02000000
+#define PROT_NONE 0x0
+#define PROT_READ 0x1
+#define PROT_SEM 0x10
+#define PROT_WRITE 0x2
+/* MADV_HWPOISON is undefined on xtensa, fix it for perf */
+#define MADV_HWPOISON 100
+/* MADV_SOFT_OFFLINE is undefined on xtensa, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on xtensa, fix it for perf */
+#define MAP_32BIT 0
+/* MAP_UNINITIALIZED is undefined on xtensa, fix it for perf */
+#define MAP_UNINITIALIZED 0
+#endif
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 0d9f48e..bc7adb8 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -1433,7 +1433,7 @@
openlog("KVP", 0, LOG_USER);
syslog(LOG_INFO, "KVP starting; pid is:%d", getpid());
- kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR);
+ kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC);
if (kvp_fd < 0) {
syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s",
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 5d51d6f..e082980 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -250,6 +250,9 @@
syslog(LOG_ERR, "/etc/fstab and /proc/mounts");
}
break;
+ case VSS_OP_HOT_BACKUP:
+ syslog(LOG_INFO, "VSS: op=CHECK HOT BACKUP\n");
+ break;
default:
syslog(LOG_ERR, "Illegal op:%d\n", op);
}
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h
new file mode 100644
index 0000000..7d41026
--- /dev/null
+++ b/tools/include/linux/coresight-pmu.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LINUX_CORESIGHT_PMU_H
+#define _LINUX_CORESIGHT_PMU_H
+
+#define CORESIGHT_ETM_PMU_NAME "cs_etm"
+#define CORESIGHT_ETM_PMU_SEED 0x10
+
+/* ETMv3.5/PTM's ETMCR config bit */
+#define ETM_OPT_CYCACC 12
+#define ETM_OPT_TS 28
+
+static inline int coresight_get_trace_id(int cpu)
+{
+ /*
+ * A trace ID of value 0 is invalid, so let's start at some
+ * random value that fits in 7 bits and go from there. Since
+ * the common convention is to have data trace IDs be I(N) + 1,
+ * set instruction trace IDs as a function of the CPU number.
+ */
+ return (CORESIGHT_ETM_PMU_SEED + (cpu * 2));
+}
+
+#endif
diff --git a/tools/include/linux/time64.h b/tools/include/linux/time64.h
new file mode 100644
index 0000000..df92654
--- /dev/null
+++ b/tools/include/linux/time64.h
@@ -0,0 +1,12 @@
+#ifndef _TOOLS_LINUX_TIME64_H
+#define _TOOLS_LINUX_TIME64_H
+
+#define MSEC_PER_SEC 1000L
+#define USEC_PER_MSEC 1000L
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_MSEC 1000000L
+#define USEC_PER_SEC 1000000L
+#define NSEC_PER_SEC 1000000000L
+#define FSEC_PER_SEC 1000000000000000LL
+
+#endif /* _LINUX_TIME64_H */
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
new file mode 100644
index 0000000..5827438
--- /dev/null
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -0,0 +1,75 @@
+#ifndef __ASM_GENERIC_MMAN_COMMON_H
+#define __ASM_GENERIC_MMAN_COMMON_H
+
+/*
+ Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd.
+ Based on: asm-xxx/mman.h
+*/
+
+#define PROT_READ 0x1 /* page can be read */
+#define PROT_WRITE 0x2 /* page can be written */
+#define PROT_EXEC 0x4 /* page can be executed */
+#define PROT_SEM 0x8 /* page may be used for atomic ops */
+#define PROT_NONE 0x0 /* page can not be accessed */
+#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */
+
+#define MAP_SHARED 0x01 /* Share changes */
+#define MAP_PRIVATE 0x02 /* Changes are private */
+#define MAP_TYPE 0x0f /* Mask for type of mapping */
+#define MAP_FIXED 0x10 /* Interpret addr exactly */
+#define MAP_ANONYMOUS 0x20 /* don't use a file */
+#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
+# define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be uninitialized */
+#else
+# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */
+#endif
+
+/*
+ * Flags for mlock
+ */
+#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */
+
+#define MS_ASYNC 1 /* sync memory asynchronously */
+#define MS_INVALIDATE 2 /* invalidate the caches */
+#define MS_SYNC 4 /* synchronous memory sync */
+
+#define MADV_NORMAL 0 /* no further special treatment */
+#define MADV_RANDOM 1 /* expect random page references */
+#define MADV_SEQUENTIAL 2 /* expect sequential page references */
+#define MADV_WILLNEED 3 /* will need these pages */
+#define MADV_DONTNEED 4 /* don't need these pages */
+
+/* common parameters: try to keep these consistent across architectures */
+#define MADV_FREE 8 /* free pages only if memory pressure */
+#define MADV_REMOVE 9 /* remove these pages & resources */
+#define MADV_DONTFORK 10 /* don't inherit across fork */
+#define MADV_DOFORK 11 /* do inherit across fork */
+#define MADV_HWPOISON 100 /* poison a page for testing */
+#define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */
+
+#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
+
+#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */
+#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */
+
+#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump,
+ overrides the coredump filter bits */
+#define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */
+
+/* compatibility flags */
+#define MAP_FILE 0
+
+/*
+ * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
+ * This gives us 6 bits, which is enough until someone invents 128 bit address
+ * spaces.
+ *
+ * Assume these are all power of twos.
+ * When 0 use the default page size.
+ */
+#define MAP_HUGE_SHIFT 26
+#define MAP_HUGE_MASK 0x3f
+
+#endif /* __ASM_GENERIC_MMAN_COMMON_H */
diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h
new file mode 100644
index 0000000..10fa785
--- /dev/null
+++ b/tools/include/uapi/asm-generic/mman.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_GENERIC_MMAN_H
+#define __ASM_GENERIC_MMAN_H
+
+#include <uapi/asm-generic/mman-common.h>
+
+#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
+#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
+#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
+#define MAP_LOCKED 0x2000 /* pages are locked */
+#define MAP_NORESERVE 0x4000 /* don't check for reservations */
+#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
+#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
+
+/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
+
+#define MCL_CURRENT 1 /* lock all current mappings */
+#define MCL_FUTURE 2 /* lock all future mappings */
+#define MCL_ONFAULT 4 /* lock all pages that are faulted in */
+
+#endif /* __ASM_GENERIC_MMAN_H */
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
new file mode 100644
index 0000000..81d8edf
--- /dev/null
+++ b/tools/include/uapi/linux/mman.h
@@ -0,0 +1,13 @@
+#ifndef _UAPI_LINUX_MMAN_H
+#define _UAPI_LINUX_MMAN_H
+
+#include <uapi/asm/mman.h>
+
+#define MREMAP_MAYMOVE 1
+#define MREMAP_FIXED 2
+
+#define OVERCOMMIT_GUESS 0
+#define OVERCOMMIT_ALWAYS 1
+#define OVERCOMMIT_NEVER 2
+
+#endif /* _UAPI_LINUX_MMAN_H */
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index ba7094b..f99f49e4 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -34,6 +34,10 @@
#define TRACEFS_MAGIC 0x74726163
#endif
+#ifndef HUGETLBFS_MAGIC
+#define HUGETLBFS_MAGIC 0x958458f6
+#endif
+
static const char * const sysfs__fs_known_mountpoints[] = {
"/sys",
0,
@@ -67,6 +71,10 @@
0,
};
+static const char * const hugetlbfs__known_mountpoints[] = {
+ 0,
+};
+
struct fs {
const char *name;
const char * const *mounts;
@@ -80,6 +88,7 @@
FS__PROCFS = 1,
FS__DEBUGFS = 2,
FS__TRACEFS = 3,
+ FS__HUGETLBFS = 4,
};
#ifndef TRACEFS_MAGIC
@@ -107,6 +116,11 @@
.mounts = tracefs__known_mountpoints,
.magic = TRACEFS_MAGIC,
},
+ [FS__HUGETLBFS] = {
+ .name = "hugetlbfs",
+ .mounts = hugetlbfs__known_mountpoints,
+ .magic = HUGETLBFS_MAGIC,
+ },
};
static bool fs__read_mounts(struct fs *fs)
@@ -265,6 +279,7 @@
FS(procfs, FS__PROCFS);
FS(debugfs, FS__DEBUGFS);
FS(tracefs, FS__TRACEFS);
+FS(hugetlbfs, FS__HUGETLBFS);
int filename__read_int(const char *filename, int *value)
{
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 16c9c2e..a63269f 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -21,6 +21,7 @@
FS(procfs)
FS(debugfs)
FS(tracefs)
+FS(hugetlbfs)
#undef FS
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index bd09d0e..143b6cd 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -175,6 +175,7 @@
"__stack_chk_fail",
"panic",
"do_exit",
+ "do_task_dead",
"__module_put_and_exit",
"complete_and_exit",
"kvm_spurious_fault",
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 15949e2..cb081ac5 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -110,6 +110,14 @@
order = caller
sort-key = function
+ [report]
+ # Defaults
+ sort-order = comm,dso,symbol
+ percent-limit = 0
+ queue-size = 0
+ children = true
+ group = true
+
Variables
~~~~~~~~~
@@ -382,6 +390,10 @@
histogram entry. Default is 0 which means no limitation.
report.*::
+ report.sort_order::
+ Allows changing the default sort order from "comm,dso,symbol" to
+ some other default, for instance "sym,dso" may be more fitting for
+ kernel developers.
report.percent-limit::
This one is mostly the same as call-graph.threshold but works for
histogram entries. Entries having an overhead lower than this
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index b303bcd..e6c9902 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -21,6 +21,8 @@
'perf probe' [options] --vars='PROBEPOINT'
or
'perf probe' [options] --funcs
+or
+'perf probe' [options] --definition='PROBE' [...]
DESCRIPTION
-----------
@@ -34,6 +36,8 @@
-k::
--vmlinux=PATH::
Specify vmlinux path which has debuginfo (Dwarf binary).
+ Only when using this with --definition, you can give an offline
+ vmlinux file.
-m::
--module=MODNAME|PATH::
@@ -96,6 +100,11 @@
can also list functions in a user space executable / shared library.
This also can accept a FILTER rule argument.
+-D::
+--definition=::
+ Show trace-event definition converted from given probe-event instead
+ of write it into tracing/[k,u]probe_events.
+
--filter=FILTER::
(Only for --vars and --funcs) Set filter. FILTER is a combination of glob
pattern, see FILTER PATTERN for detail.
@@ -176,13 +185,12 @@
'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
'$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
-
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo (*). Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal integers (x/x8/x16/x32/x64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
TYPES
-----
-Basic types (u8/u16/u32/u64/s8/s16/s32/s64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe.
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) and hexadecimal integers (x8/x16/x32/x64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively, and 'x' means that is shown in hexadecimal format. Traced arguments are shown in decimal (sNN/uNN) or hex (xNN). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe. Moreover, you can use 'x' to explicitly specify to be shown in hexadecimal (the size is also auto-detected).
String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 379a2be..9233519 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -35,15 +35,15 @@
- a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where
'param1', 'param2', etc are defined as formats for the PMU in
- /sys/bus/event_sources/devices/<pmu>/format/*.
+ /sys/bus/event_source/devices/<pmu>/format/*.
- a symbolically formed event like 'pmu/config=M,config1=N,config3=K/'
where M, N, K are numbers (in decimal, hex, octal format). Acceptable
values for each of 'config', 'config1' and 'config2' are defined by
- corresponding entries in /sys/bus/event_sources/devices/<pmu>/format/*
+ corresponding entries in /sys/bus/event_source/devices/<pmu>/format/*
param1 and param2 are defined as formats for the PMU in:
- /sys/bus/event_sources/devices/<pmu>/format/*
+ /sys/bus/event_source/devices/<pmu>/format/*
There are also some params which are not defined in .../<pmu>/format/*.
These params can be used to overload default config values per event.
@@ -60,6 +60,18 @@
Note: If user explicitly sets options which conflict with the params,
the value set by the params will be overridden.
+ Also not defined in .../<pmu>/format/* are PMU driver specific
+ configuration parameters. Any configuration parameter preceded by
+ the letter '@' is not interpreted in user space and sent down directly
+ to the PMU driver. For example:
+
+ perf record -e some_event/@cfg1,@cfg2=config/ ...
+
+ will see 'cfg1' and 'cfg2=config' pushed to the PMU driver associated
+ with the event for further processing. There is no restriction on
+ what the configuration parameters are, as long as their semantic is
+ understood and supported by the PMU driver.
+
- a hardware breakpoint event in the form of '\mem:addr[/len][:access]'
where addr is the address in memory you want to break in.
Access is the memory access type (read, write, execute) it can
@@ -77,9 +89,62 @@
--filter=<filter>::
Event filter. This option should follow a event selector (-e) which
- selects tracepoint event(s). Multiple '--filter' options are combined
+ selects either tracepoint event(s) or a hardware trace PMU
+ (e.g. Intel PT or CoreSight).
+
+ - tracepoint filters
+
+ In the case of tracepoints, multiple '--filter' options are combined
using '&&'.
+ - address filters
+
+ A hardware trace PMU advertises its ability to accept a number of
+ address filters by specifying a non-zero value in
+ /sys/bus/event_source/devices/<pmu>/nr_addr_filters.
+
+ Address filters have the format:
+
+ filter|start|stop|tracestop <start> [/ <size>] [@<file name>]
+
+ Where:
+ - 'filter': defines a region that will be traced.
+ - 'start': defines an address at which tracing will begin.
+ - 'stop': defines an address at which tracing will stop.
+ - 'tracestop': defines a region in which tracing will stop.
+
+ <file name> is the name of the object file, <start> is the offset to the
+ code to trace in that file, and <size> is the size of the region to
+ trace. 'start' and 'stop' filters need not specify a <size>.
+
+ If no object file is specified then the kernel is assumed, in which case
+ the start address must be a current kernel memory address.
+
+ <start> can also be specified by providing the name of a symbol. If the
+ symbol name is not unique, it can be disambiguated by inserting #n where
+ 'n' selects the n'th symbol in address order. Alternately #0, #g or #G
+ select only a global symbol. <size> can also be specified by providing
+ the name of a symbol, in which case the size is calculated to the end
+ of that symbol. For 'filter' and 'tracestop' filters, if <size> is
+ omitted and <start> is a symbol, then the size is calculated to the end
+ of that symbol.
+
+ If <size> is omitted and <start> is '*', then the start and size will
+ be calculated from the first and last symbols, i.e. to trace the whole
+ file.
+
+ If symbol names (or '*') are provided, they must be surrounded by white
+ space.
+
+ The filter passed to the kernel is not necessarily the same as entered.
+ To see the filter that is passed, use the -v option.
+
+ The kernel may not be able to configure a trace region if it is not
+ within a single mapping. MMAP events (or /proc/<pid>/maps) can be
+ examined to determine if that is a possibility.
+
+ Multiple filters can be separated with space or comma.
+
--exclude-perf::
Don't record events issued by perf itself. This option should follow
a event selector (-e) which selects tracepoint event(s). It adds a
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index fdc99fe..b664b18 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -437,6 +437,10 @@
quipper
The quipper C++ parser is available at
-https://chromium.googlesource.com/chromiumos/platform/chromiumos-wide-profiling/
+https://chromium.googlesource.com/chromiumos/platform2
+
+It is under the chromiumos-wide-profiling/ subdirectory. This library can
+convert a perf data file to a protobuf and vice versa.
+
Unfortunately this parser tends to be many versions behind and may not be able
to parse data files generated by recent perf.
diff --git a/tools/perf/Documentation/perfconfig.example b/tools/perf/Documentation/perfconfig.example
index 1d8d5bc..2b477c1 100644
--- a/tools/perf/Documentation/perfconfig.example
+++ b/tools/perf/Documentation/perfconfig.example
@@ -27,3 +27,12 @@
use_offset = true
jump_arrows = true
show_nr_jumps = false
+
+[report]
+
+ # Defaults
+ sort-order = comm,dso,symbol
+ percent-limit = 0
+ queue-size = 0
+ children = true
+ group = true
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index ad2534d..0bda2cc 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -60,14 +60,18 @@
tools/include/linux/atomic.h
tools/include/linux/bitops.h
tools/include/linux/compiler.h
+tools/include/linux/coresight-pmu.h
tools/include/linux/filter.h
tools/include/linux/hash.h
tools/include/linux/kernel.h
tools/include/linux/list.h
tools/include/linux/log2.h
+tools/include/uapi/asm-generic/mman-common.h
+tools/include/uapi/asm-generic/mman.h
tools/include/uapi/linux/bpf.h
tools/include/uapi/linux/bpf_common.h
tools/include/uapi/linux/hw_breakpoint.h
+tools/include/uapi/linux/mman.h
tools/include/uapi/linux/perf_event.h
tools/include/linux/poison.h
tools/include/linux/rbtree.h
@@ -77,4 +81,6 @@
tools/include/linux/types.h
tools/include/linux/err.h
tools/include/linux/bitmap.h
+tools/include/linux/time64.h
+tools/arch/*/include/uapi/asm/mman.h
tools/arch/*/include/uapi/asm/perf_regs.h
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 24803c5..72edf83 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -746,10 +746,13 @@
endif
ifndef NO_AUXTRACE
- ifeq ($(feature-get_cpuid), 0)
- msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
- NO_AUXTRACE := 1
- else
+ ifeq ($(ARCH),x86)
+ ifeq ($(feature-get_cpuid), 0)
+ msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
+ NO_AUXTRACE := 1
+ endif
+ endif
+ ifndef NO_AUXTRACE
$(call detected,CONFIG_AUXTRACE)
CFLAGS += -DHAVE_AUXTRACE_SUPPORT
endif
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 2d90875..d710db1 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -165,7 +165,7 @@
# non-config cases
config := 1
-NON_CONFIG_TARGETS := clean TAGS tags cscope help
+NON_CONFIG_TARGETS := clean TAGS tags cscope help install-doc
ifdef MAKECMDGOALS
ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
@@ -429,6 +429,18 @@
@(test -f ../../include/asm-generic/bitops/fls64.h && ( \
(diff -B ../include/asm-generic/bitops/fls64.h ../../include/asm-generic/bitops/fls64.h >/dev/null) \
|| echo "Warning: tools/include/asm-generic/bitops/fls64.h differs from kernel" >&2 )) || true
+ @(test -f ../../include/linux/coresight-pmu.h && ( \
+ (diff -B ../include/linux/coresight-pmu.h ../../include/linux/coresight-pmu.h >/dev/null) \
+ || echo "Warning: tools/include/linux/coresight-pmu.h differs from kernel" >&2 )) || true
+ @(test -f ../../include/uapi/asm-generic/mman-common.h && ( \
+ (diff -B ../include/uapi/asm-generic/mman-common.h ../../include/uapi/asm-generic/mman-common.h >/dev/null) \
+ || echo "Warning: tools/include/uapi/asm-generic/mman-common.h differs from kernel" >&2 )) || true
+ @(test -f ../../include/uapi/asm-generic/mman.h && ( \
+ (diff -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>$$" ../include/uapi/asm-generic/mman.h ../../include/uapi/asm-generic/mman.h >/dev/null) \
+ || echo "Warning: tools/include/uapi/asm-generic/mman.h differs from kernel" >&2 )) || true
+ @(test -f ../../include/uapi/linux/mman.h && ( \
+ (diff -B -I "^#include <\(uapi/\)*asm/mman.h>$$" ../include/uapi/linux/mman.h ../../include/uapi/linux/mman.h >/dev/null) \
+ || echo "Warning: tools/include/uapi/linux/mman.h differs from kernel" >&2 )) || true
$(Q)$(MAKE) $(build)=perf
$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
diff --git a/tools/perf/arch/arm/include/dwarf-regs-table.h b/tools/perf/arch/arm/include/dwarf-regs-table.h
new file mode 100644
index 0000000..f298d03
--- /dev/null
+++ b/tools/perf/arch/arm/include/dwarf-regs-table.h
@@ -0,0 +1,9 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const arm_regstr_tbl[] = {
+ "%r0", "%r1", "%r2", "%r3", "%r4",
+ "%r5", "%r6", "%r7", "%r8", "%r9", "%r10",
+ "%fp", "%ip", "%sp", "%lr", "%pc",
+};
+#endif
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
index f98da17..e64c5f2 100644
--- a/tools/perf/arch/arm/util/Build
+++ b/tools/perf/arch/arm/util/Build
@@ -2,3 +2,5 @@
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+
+libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
new file mode 100644
index 0000000..8edf2cb
--- /dev/null
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdbool.h>
+#include <linux/coresight-pmu.h>
+
+#include "../../util/auxtrace.h"
+#include "../../util/evlist.h"
+#include "../../util/pmu.h"
+#include "cs-etm.h"
+
+struct auxtrace_record
+*auxtrace_record__init(struct perf_evlist *evlist, int *err)
+{
+ struct perf_pmu *cs_etm_pmu;
+ struct perf_evsel *evsel;
+ bool found_etm = false;
+
+ cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
+
+ if (evlist) {
+ evlist__for_each_entry(evlist, evsel) {
+ if (cs_etm_pmu &&
+ evsel->attr.type == cs_etm_pmu->type)
+ found_etm = true;
+ }
+ }
+
+ if (found_etm)
+ return cs_etm_record_init(err);
+
+ /*
+ * Clear 'err' even if we haven't found a cs_etm event - that way perf
+ * record can still be used even if tracers aren't present. The NULL
+ * return value will take care of telling the infrastructure HW tracing
+ * isn't available.
+ */
+ *err = 0;
+ return NULL;
+}
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
new file mode 100644
index 0000000..47d584d
--- /dev/null
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -0,0 +1,617 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <api/fs/fs.h>
+#include <linux/bitops.h>
+#include <linux/coresight-pmu.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+
+#include "cs-etm.h"
+#include "../../perf.h"
+#include "../../util/auxtrace.h"
+#include "../../util/cpumap.h"
+#include "../../util/evlist.h"
+#include "../../util/evsel.h"
+#include "../../util/pmu.h"
+#include "../../util/thread_map.h"
+#include "../../util/cs-etm.h"
+
+#include <stdlib.h>
+
+#define ENABLE_SINK_MAX 128
+#define CS_BUS_DEVICE_PATH "/bus/coresight/devices/"
+
+struct cs_etm_recording {
+ struct auxtrace_record itr;
+ struct perf_pmu *cs_etm_pmu;
+ struct perf_evlist *evlist;
+ bool snapshot_mode;
+ size_t snapshot_size;
+};
+
+static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu);
+
+static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr,
+ struct record_opts *opts,
+ const char *str)
+{
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ unsigned long long snapshot_size = 0;
+ char *endptr;
+
+ if (str) {
+ snapshot_size = strtoull(str, &endptr, 0);
+ if (*endptr || snapshot_size > SIZE_MAX)
+ return -1;
+ }
+
+ opts->auxtrace_snapshot_mode = true;
+ opts->auxtrace_snapshot_size = snapshot_size;
+ ptr->snapshot_size = snapshot_size;
+
+ return 0;
+}
+
+static int cs_etm_recording_options(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts)
+{
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+ struct perf_evsel *evsel, *cs_etm_evsel = NULL;
+ const struct cpu_map *cpus = evlist->cpus;
+ bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0);
+
+ ptr->evlist = evlist;
+ ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == cs_etm_pmu->type) {
+ if (cs_etm_evsel) {
+ pr_err("There may be only one %s event\n",
+ CORESIGHT_ETM_PMU_NAME);
+ return -EINVAL;
+ }
+ evsel->attr.freq = 0;
+ evsel->attr.sample_period = 1;
+ cs_etm_evsel = evsel;
+ opts->full_auxtrace = true;
+ }
+ }
+
+ /* no need to continue if at least one event of interest was found */
+ if (!cs_etm_evsel)
+ return 0;
+
+ if (opts->use_clockid) {
+ pr_err("Cannot use clockid (-k option) with %s\n",
+ CORESIGHT_ETM_PMU_NAME);
+ return -EINVAL;
+ }
+
+ /* we are in snapshot mode */
+ if (opts->auxtrace_snapshot_mode) {
+ /*
+ * No size were given to '-S' or '-m,', so go with
+ * the default
+ */
+ if (!opts->auxtrace_snapshot_size &&
+ !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages =
+ KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ } else if (!opts->auxtrace_mmap_pages && !privileged &&
+ opts->mmap_pages == UINT_MAX) {
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+
+ /*
+ * '-m,xyz' was specified but no snapshot size, so make the
+ * snapshot size as big as the auxtrace mmap area.
+ */
+ if (!opts->auxtrace_snapshot_size) {
+ opts->auxtrace_snapshot_size =
+ opts->auxtrace_mmap_pages * (size_t)page_size;
+ }
+
+ /*
+ * -Sxyz was specified but no auxtrace mmap area, so make the
+ * auxtrace mmap area big enough to fit the requested snapshot
+ * size.
+ */
+ if (!opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_snapshot_size;
+
+ sz = round_up(sz, page_size) / page_size;
+ opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+ }
+
+ /* Snapshost size can't be bigger than the auxtrace area */
+ if (opts->auxtrace_snapshot_size >
+ opts->auxtrace_mmap_pages * (size_t)page_size) {
+ pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+ opts->auxtrace_snapshot_size,
+ opts->auxtrace_mmap_pages * (size_t)page_size);
+ return -EINVAL;
+ }
+
+ /* Something went wrong somewhere - this shouldn't happen */
+ if (!opts->auxtrace_snapshot_size ||
+ !opts->auxtrace_mmap_pages) {
+ pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+ return -EINVAL;
+ }
+ }
+
+ /* We are in full trace mode but '-m,xyz' wasn't specified */
+ if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+
+ }
+
+ /* Validate auxtrace_mmap_pages provided by user */
+ if (opts->auxtrace_mmap_pages) {
+ unsigned int max_page = (KiB(128) / page_size);
+ size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+
+ if (!privileged &&
+ opts->auxtrace_mmap_pages > max_page) {
+ opts->auxtrace_mmap_pages = max_page;
+ pr_err("auxtrace too big, truncating to %d\n",
+ max_page);
+ }
+
+ if (!is_power_of_2(sz)) {
+ pr_err("Invalid mmap size for %s: must be a power of 2\n",
+ CORESIGHT_ETM_PMU_NAME);
+ return -EINVAL;
+ }
+ }
+
+ if (opts->auxtrace_snapshot_mode)
+ pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME,
+ opts->auxtrace_snapshot_size);
+
+ if (cs_etm_evsel) {
+ /*
+ * To obtain the auxtrace buffer file descriptor, the auxtrace
+ * event must come first.
+ */
+ perf_evlist__to_front(evlist, cs_etm_evsel);
+ /*
+ * In the case of per-cpu mmaps, we need the CPU on the
+ * AUX event.
+ */
+ if (!cpu_map__empty(cpus))
+ perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
+ }
+
+ /* Add dummy event to keep tracking */
+ if (opts->full_auxtrace) {
+ struct perf_evsel *tracking_evsel;
+ int err;
+
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ tracking_evsel = perf_evlist__last(evlist);
+ perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+ tracking_evsel->attr.freq = 0;
+ tracking_evsel->attr.sample_period = 1;
+
+ /* In per-cpu case, always need the time of mmap events etc */
+ if (!cpu_map__empty(cpus))
+ perf_evsel__set_sample_bit(tracking_evsel, TIME);
+ }
+
+ return 0;
+}
+
+static u64 cs_etm_get_config(struct auxtrace_record *itr)
+{
+ u64 config = 0;
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+ struct perf_evlist *evlist = ptr->evlist;
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == cs_etm_pmu->type) {
+ /*
+ * Variable perf_event_attr::config is assigned to
+ * ETMv3/PTM. The bit fields have been made to match
+ * the ETMv3.5 ETRMCR register specification. See the
+ * PMU_FORMAT_ATTR() declarations in
+ * drivers/hwtracing/coresight/coresight-perf.c for
+ * details.
+ */
+ config = evsel->attr.config;
+ break;
+ }
+ }
+
+ return config;
+}
+
+static size_t
+cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ int i;
+ int etmv3 = 0, etmv4 = 0;
+ const struct cpu_map *cpus = evlist->cpus;
+
+ /* cpu map is not empty, we have specific CPUs to work with */
+ if (!cpu_map__empty(cpus)) {
+ for (i = 0; i < cpu_map__nr(cpus); i++) {
+ if (cs_etm_is_etmv4(itr, cpus->map[i]))
+ etmv4++;
+ else
+ etmv3++;
+ }
+ } else {
+ /* get configuration for all CPUs in the system */
+ for (i = 0; i < cpu__max_cpu(); i++) {
+ if (cs_etm_is_etmv4(itr, i))
+ etmv4++;
+ else
+ etmv3++;
+ }
+ }
+
+ return (CS_ETM_HEADER_SIZE +
+ (etmv4 * CS_ETMV4_PRIV_SIZE) +
+ (etmv3 * CS_ETMV3_PRIV_SIZE));
+}
+
+static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = {
+ [CS_ETM_ETMCCER] = "mgmt/etmccer",
+ [CS_ETM_ETMIDR] = "mgmt/etmidr",
+};
+
+static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = {
+ [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0",
+ [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1",
+ [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2",
+ [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8",
+ [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus",
+};
+
+static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu)
+{
+ bool ret = false;
+ char path[PATH_MAX];
+ int scan;
+ unsigned int val;
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+ /* Take any of the RO files for ETMv4 and see if it present */
+ snprintf(path, PATH_MAX, "cpu%d/%s",
+ cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
+ scan = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val);
+
+ /* The file was read successfully, we have a winner */
+ if (scan == 1)
+ ret = true;
+
+ return ret;
+}
+
+static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path)
+{
+ char pmu_path[PATH_MAX];
+ int scan;
+ unsigned int val = 0;
+
+ /* Get RO metadata from sysfs */
+ snprintf(pmu_path, PATH_MAX, "cpu%d/%s", cpu, path);
+
+ scan = perf_pmu__scan_file(pmu, pmu_path, "%x", &val);
+ if (scan != 1)
+ pr_err("%s: error reading: %s\n", __func__, pmu_path);
+
+ return val;
+}
+
+static void cs_etm_get_metadata(int cpu, u32 *offset,
+ struct auxtrace_record *itr,
+ struct auxtrace_info_event *info)
+{
+ u32 increment;
+ u64 magic;
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+ /* first see what kind of tracer this cpu is affined to */
+ if (cs_etm_is_etmv4(itr, cpu)) {
+ magic = __perf_cs_etmv4_magic;
+ /* Get trace configuration register */
+ info->priv[*offset + CS_ETMV4_TRCCONFIGR] =
+ cs_etm_get_config(itr);
+ /* Get traceID from the framework */
+ info->priv[*offset + CS_ETMV4_TRCTRACEIDR] =
+ coresight_get_trace_id(cpu);
+ /* Get read-only information from sysFS */
+ info->priv[*offset + CS_ETMV4_TRCIDR0] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
+ info->priv[*offset + CS_ETMV4_TRCIDR1] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv4_ro[CS_ETMV4_TRCIDR1]);
+ info->priv[*offset + CS_ETMV4_TRCIDR2] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv4_ro[CS_ETMV4_TRCIDR2]);
+ info->priv[*offset + CS_ETMV4_TRCIDR8] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv4_ro[CS_ETMV4_TRCIDR8]);
+ info->priv[*offset + CS_ETMV4_TRCAUTHSTATUS] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv4_ro
+ [CS_ETMV4_TRCAUTHSTATUS]);
+
+ /* How much space was used */
+ increment = CS_ETMV4_PRIV_MAX;
+ } else {
+ magic = __perf_cs_etmv3_magic;
+ /* Get configuration register */
+ info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
+ /* Get traceID from the framework */
+ info->priv[*offset + CS_ETM_ETMTRACEIDR] =
+ coresight_get_trace_id(cpu);
+ /* Get read-only information from sysFS */
+ info->priv[*offset + CS_ETM_ETMCCER] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv3_ro[CS_ETM_ETMCCER]);
+ info->priv[*offset + CS_ETM_ETMIDR] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv3_ro[CS_ETM_ETMIDR]);
+
+ /* How much space was used */
+ increment = CS_ETM_PRIV_MAX;
+ }
+
+ /* Build generic header portion */
+ info->priv[*offset + CS_ETM_MAGIC] = magic;
+ info->priv[*offset + CS_ETM_CPU] = cpu;
+ /* Where the next CPU entry should start from */
+ *offset += increment;
+}
+
+static int cs_etm_info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *info,
+ size_t priv_size)
+{
+ int i;
+ u32 offset;
+ u64 nr_cpu, type;
+ const struct cpu_map *cpus = session->evlist->cpus;
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+ if (priv_size != cs_etm_info_priv_size(itr, session->evlist))
+ return -EINVAL;
+
+ if (!session->evlist->nr_mmaps)
+ return -EINVAL;
+
+ /* If the cpu_map is empty all CPUs are involved */
+ nr_cpu = cpu_map__empty(cpus) ? cpu__max_cpu() : cpu_map__nr(cpus);
+ /* Get PMU type as dynamically assigned by the core */
+ type = cs_etm_pmu->type;
+
+ /* First fill out the session header */
+ info->type = PERF_AUXTRACE_CS_ETM;
+ info->priv[CS_HEADER_VERSION_0] = 0;
+ info->priv[CS_PMU_TYPE_CPUS] = type << 32;
+ info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu;
+ info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode;
+
+ offset = CS_ETM_SNAPSHOT + 1;
+
+ /* cpu map is not empty, we have specific CPUs to work with */
+ if (!cpu_map__empty(cpus)) {
+ for (i = 0; i < cpu_map__nr(cpus) && offset < priv_size; i++)
+ cs_etm_get_metadata(cpus->map[i], &offset, itr, info);
+ } else {
+ /* get configuration for all CPUs in the system */
+ for (i = 0; i < cpu__max_cpu(); i++)
+ cs_etm_get_metadata(i, &offset, itr, info);
+ }
+
+ return 0;
+}
+
+static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused,
+ int idx, struct auxtrace_mmap *mm,
+ unsigned char *data __maybe_unused,
+ u64 *head, u64 *old)
+{
+ pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
+ __func__, idx, (size_t)*old, (size_t)*head, mm->len);
+
+ *old = *head;
+ *head += mm->len;
+
+ return 0;
+}
+
+static int cs_etm_snapshot_start(struct auxtrace_record *itr)
+{
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(ptr->evlist, evsel) {
+ if (evsel->attr.type == ptr->cs_etm_pmu->type)
+ return perf_evsel__disable(evsel);
+ }
+ return -EINVAL;
+}
+
+static int cs_etm_snapshot_finish(struct auxtrace_record *itr)
+{
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(ptr->evlist, evsel) {
+ if (evsel->attr.type == ptr->cs_etm_pmu->type)
+ return perf_evsel__enable(evsel);
+ }
+ return -EINVAL;
+}
+
+static u64 cs_etm_reference(struct auxtrace_record *itr __maybe_unused)
+{
+ return (((u64) rand() << 0) & 0x00000000FFFFFFFFull) |
+ (((u64) rand() << 32) & 0xFFFFFFFF00000000ull);
+}
+
+static void cs_etm_recording_free(struct auxtrace_record *itr)
+{
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ free(ptr);
+}
+
+static int cs_etm_read_finish(struct auxtrace_record *itr, int idx)
+{
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(ptr->evlist, evsel) {
+ if (evsel->attr.type == ptr->cs_etm_pmu->type)
+ return perf_evlist__enable_event_idx(ptr->evlist,
+ evsel, idx);
+ }
+
+ return -EINVAL;
+}
+
+struct auxtrace_record *cs_etm_record_init(int *err)
+{
+ struct perf_pmu *cs_etm_pmu;
+ struct cs_etm_recording *ptr;
+
+ cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
+
+ if (!cs_etm_pmu) {
+ *err = -EINVAL;
+ goto out;
+ }
+
+ ptr = zalloc(sizeof(struct cs_etm_recording));
+ if (!ptr) {
+ *err = -ENOMEM;
+ goto out;
+ }
+
+ ptr->cs_etm_pmu = cs_etm_pmu;
+ ptr->itr.parse_snapshot_options = cs_etm_parse_snapshot_options;
+ ptr->itr.recording_options = cs_etm_recording_options;
+ ptr->itr.info_priv_size = cs_etm_info_priv_size;
+ ptr->itr.info_fill = cs_etm_info_fill;
+ ptr->itr.find_snapshot = cs_etm_find_snapshot;
+ ptr->itr.snapshot_start = cs_etm_snapshot_start;
+ ptr->itr.snapshot_finish = cs_etm_snapshot_finish;
+ ptr->itr.reference = cs_etm_reference;
+ ptr->itr.free = cs_etm_recording_free;
+ ptr->itr.read_finish = cs_etm_read_finish;
+
+ *err = 0;
+ return &ptr->itr;
+out:
+ return NULL;
+}
+
+static FILE *cs_device__open_file(const char *name)
+{
+ struct stat st;
+ char path[PATH_MAX];
+ const char *sysfs;
+
+ sysfs = sysfs__mountpoint();
+ if (!sysfs)
+ return NULL;
+
+ snprintf(path, PATH_MAX,
+ "%s" CS_BUS_DEVICE_PATH "%s", sysfs, name);
+
+ printf("path: %s\n", path);
+
+ if (stat(path, &st) < 0)
+ return NULL;
+
+ return fopen(path, "w");
+
+}
+
+static __attribute__((format(printf, 2, 3)))
+int cs_device__print_file(const char *name, const char *fmt, ...)
+{
+ va_list args;
+ FILE *file;
+ int ret = -EINVAL;
+
+ va_start(args, fmt);
+ file = cs_device__open_file(name);
+ if (file) {
+ ret = vfprintf(file, fmt, args);
+ fclose(file);
+ }
+ va_end(args);
+ return ret;
+}
+
+int cs_etm_set_drv_config(struct perf_evsel_config_term *term)
+{
+ int ret;
+ char enable_sink[ENABLE_SINK_MAX];
+
+ snprintf(enable_sink, ENABLE_SINK_MAX, "%s/%s",
+ term->val.drv_cfg, "enable_sink");
+
+ ret = cs_device__print_file(enable_sink, "%d", 1);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
diff --git a/tools/perf/arch/arm/util/cs-etm.h b/tools/perf/arch/arm/util/cs-etm.h
new file mode 100644
index 0000000..5256741
--- /dev/null
+++ b/tools/perf/arch/arm/util/cs-etm.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef INCLUDE__PERF_CS_ETM_H__
+#define INCLUDE__PERF_CS_ETM_H__
+
+#include "../../util/evsel.h"
+
+struct auxtrace_record *cs_etm_record_init(int *err);
+int cs_etm_set_drv_config(struct perf_evsel_config_term *term);
+
+#endif
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
new file mode 100644
index 0000000..98d6739
--- /dev/null
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <linux/coresight-pmu.h>
+#include <linux/perf_event.h>
+
+#include "cs-etm.h"
+#include "../../util/pmu.h"
+
+struct perf_event_attr
+*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+{
+#ifdef HAVE_AUXTRACE_SUPPORT
+ if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
+ /* add ETM default config here */
+ pmu->selectable = true;
+ pmu->set_drv_config = cs_etm_set_drv_config;
+ }
+#endif
+ return NULL;
+}
diff --git a/tools/perf/arch/arm64/include/dwarf-regs-table.h b/tools/perf/arch/arm64/include/dwarf-regs-table.h
new file mode 100644
index 0000000..2675936
--- /dev/null
+++ b/tools/perf/arch/arm64/include/dwarf-regs-table.h
@@ -0,0 +1,13 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const aarch64_regstr_tbl[] = {
+ "%r0", "%r1", "%r2", "%r3", "%r4",
+ "%r5", "%r6", "%r7", "%r8", "%r9",
+ "%r10", "%r11", "%r12", "%r13", "%r14",
+ "%r15", "%r16", "%r17", "%r18", "%r19",
+ "%r20", "%r21", "%r22", "%r23", "%r24",
+ "%r25", "%r26", "%r27", "%r28", "%r29",
+ "%lr", "%sp",
+};
+#endif
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 02f41db..cef6fb3 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,2 +1,6 @@
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+
+libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
+ ../../arm/util/auxtrace.o \
+ ../../arm/util/cs-etm.o
diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build
index 54afe4a..db52fa2 100644
--- a/tools/perf/arch/powerpc/Build
+++ b/tools/perf/arch/powerpc/Build
@@ -1 +1,2 @@
libperf-y += util/
+libperf-y += tests/
diff --git a/tools/perf/arch/powerpc/include/arch-tests.h b/tools/perf/arch/powerpc/include/arch-tests.h
new file mode 100644
index 0000000..84d8ded
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/arch-tests.h
@@ -0,0 +1,13 @@
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread);
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/tools/perf/arch/powerpc/include/dwarf-regs-table.h b/tools/perf/arch/powerpc/include/dwarf-regs-table.h
new file mode 100644
index 0000000..db4730f
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/dwarf-regs-table.h
@@ -0,0 +1,27 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+/*
+ * Reference:
+ * http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html
+ * http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf
+ */
+#define REG_DWARFNUM_NAME(reg, idx) [idx] = "%" #reg
+
+static const char * const powerpc_regstr_tbl[] = {
+ "%gpr0", "%gpr1", "%gpr2", "%gpr3", "%gpr4",
+ "%gpr5", "%gpr6", "%gpr7", "%gpr8", "%gpr9",
+ "%gpr10", "%gpr11", "%gpr12", "%gpr13", "%gpr14",
+ "%gpr15", "%gpr16", "%gpr17", "%gpr18", "%gpr19",
+ "%gpr20", "%gpr21", "%gpr22", "%gpr23", "%gpr24",
+ "%gpr25", "%gpr26", "%gpr27", "%gpr28", "%gpr29",
+ "%gpr30", "%gpr31",
+ REG_DWARFNUM_NAME(msr, 66),
+ REG_DWARFNUM_NAME(ctr, 109),
+ REG_DWARFNUM_NAME(link, 108),
+ REG_DWARFNUM_NAME(xer, 101),
+ REG_DWARFNUM_NAME(dar, 119),
+ REG_DWARFNUM_NAME(dsisr, 118),
+};
+
+#endif
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index 75de0e9..c12f4e8 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -5,6 +5,8 @@
#include <linux/types.h>
#include <asm/perf_regs.h>
+void perf_regs_load(u64 *regs);
+
#define PERF_REGS_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
#define PERF_REGS_MAX PERF_REG_POWERPC_MAX
#ifdef __powerpc64__
diff --git a/tools/perf/arch/powerpc/tests/Build b/tools/perf/arch/powerpc/tests/Build
new file mode 100644
index 0000000..d827ef3
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/Build
@@ -0,0 +1,4 @@
+libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o
+libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/tools/perf/arch/powerpc/tests/arch-tests.c b/tools/perf/arch/powerpc/tests/arch-tests.c
new file mode 100644
index 0000000..e24f462
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/arch-tests.c
@@ -0,0 +1,15 @@
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+ {
+ .desc = "Test dwarf unwind",
+ .func = test__dwarf_unwind,
+ },
+#endif
+ {
+ .func = NULL,
+ },
+};
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
new file mode 100644
index 0000000..0bac313
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
@@ -0,0 +1,62 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_POWERPC_R1];
+
+ map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/powerpc/tests/regs_load.S b/tools/perf/arch/powerpc/tests/regs_load.S
new file mode 100644
index 0000000..d76c9a3
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/regs_load.S
@@ -0,0 +1,94 @@
+#include <linux/linkage.h>
+
+/* Offset is based on macros from arch/powerpc/include/uapi/asm/ptrace.h. */
+#define R0 0
+#define R1 1 * 8
+#define R2 2 * 8
+#define R3 3 * 8
+#define R4 4 * 8
+#define R5 5 * 8
+#define R6 6 * 8
+#define R7 7 * 8
+#define R8 8 * 8
+#define R9 9 * 8
+#define R10 10 * 8
+#define R11 11 * 8
+#define R12 12 * 8
+#define R13 13 * 8
+#define R14 14 * 8
+#define R15 15 * 8
+#define R16 16 * 8
+#define R17 17 * 8
+#define R18 18 * 8
+#define R19 19 * 8
+#define R20 20 * 8
+#define R21 21 * 8
+#define R22 22 * 8
+#define R23 23 * 8
+#define R24 24 * 8
+#define R25 25 * 8
+#define R26 26 * 8
+#define R27 27 * 8
+#define R28 28 * 8
+#define R29 29 * 8
+#define R30 30 * 8
+#define R31 31 * 8
+#define NIP 32 * 8
+#define CTR 35 * 8
+#define LINK 36 * 8
+#define XER 37 * 8
+
+.globl perf_regs_load
+perf_regs_load:
+ std 0, R0(3)
+ std 1, R1(3)
+ std 2, R2(3)
+ std 3, R3(3)
+ std 4, R4(3)
+ std 5, R5(3)
+ std 6, R6(3)
+ std 7, R7(3)
+ std 8, R8(3)
+ std 9, R9(3)
+ std 10, R10(3)
+ std 11, R11(3)
+ std 12, R12(3)
+ std 13, R13(3)
+ std 14, R14(3)
+ std 15, R15(3)
+ std 16, R16(3)
+ std 17, R17(3)
+ std 18, R18(3)
+ std 19, R19(3)
+ std 20, R20(3)
+ std 21, R21(3)
+ std 22, R22(3)
+ std 23, R23(3)
+ std 24, R24(3)
+ std 25, R25(3)
+ std 26, R26(3)
+ std 27, R27(3)
+ std 28, R28(3)
+ std 29, R29(3)
+ std 30, R30(3)
+ std 31, R31(3)
+
+ /* store NIP */
+ mflr 4
+ std 4, NIP(3)
+
+ /* Store LR */
+ std 4, LINK(3)
+
+ /* Store XER */
+ mfxer 4
+ std 4, XER(3)
+
+ /* Store CTR */
+ mfctr 4
+ std 4, CTR(3)
+
+ /* Restore original value of r4 */
+ ld 4, R4(3)
+
+ blr
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index 35745a7..ed9d5d1 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -108,7 +108,7 @@
int i = 0;
map = get_target_map(pev->target, pev->uprobes);
- if (!map || map__load(map, NULL) < 0)
+ if (!map || map__load(map) < 0)
return;
for (i = 0; i < ntevs; i++) {
diff --git a/tools/perf/arch/s390/include/dwarf-regs-table.h b/tools/perf/arch/s390/include/dwarf-regs-table.h
new file mode 100644
index 0000000..9da74a9
--- /dev/null
+++ b/tools/perf/arch/s390/include/dwarf-regs-table.h
@@ -0,0 +1,8 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const s390_regstr_tbl[] = {
+ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+};
+#endif
diff --git a/tools/perf/arch/sh/include/dwarf-regs-table.h b/tools/perf/arch/sh/include/dwarf-regs-table.h
new file mode 100644
index 0000000..3a2deaf
--- /dev/null
+++ b/tools/perf/arch/sh/include/dwarf-regs-table.h
@@ -0,0 +1,25 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+const char * const sh_regstr_tbl[] = {
+ "r0",
+ "r1",
+ "r2",
+ "r3",
+ "r4",
+ "r5",
+ "r6",
+ "r7",
+ "r8",
+ "r9",
+ "r10",
+ "r11",
+ "r12",
+ "r13",
+ "r14",
+ "r15",
+ "pc",
+ "pr",
+};
+
+#endif
diff --git a/tools/perf/arch/sparc/include/dwarf-regs-table.h b/tools/perf/arch/sparc/include/dwarf-regs-table.h
new file mode 100644
index 0000000..12c0761
--- /dev/null
+++ b/tools/perf/arch/sparc/include/dwarf-regs-table.h
@@ -0,0 +1,18 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const sparc_regstr_tbl[] = {
+ "%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7",
+ "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7",
+ "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7",
+ "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7",
+ "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
+ "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
+ "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
+ "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
+ "%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39",
+ "%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47",
+ "%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55",
+ "%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63",
+};
+#endif
diff --git a/tools/perf/arch/x86/include/dwarf-regs-table.h b/tools/perf/arch/x86/include/dwarf-regs-table.h
new file mode 100644
index 0000000..9b5e5cb
--- /dev/null
+++ b/tools/perf/arch/x86/include/dwarf-regs-table.h
@@ -0,0 +1,14 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const x86_32_regstr_tbl[] = {
+ "%ax", "%cx", "%dx", "%bx", "$stack",/* Stack address instead of %sp */
+ "%bp", "%si", "%di",
+};
+
+static const char * const x86_64_regstr_tbl[] = {
+ "%ax", "%dx", "%cx", "%bx", "%si", "%di",
+ "%bp", "%sp", "%r8", "%r9", "%r10", "%r11",
+ "%r12", "%r13", "%r14", "%r15",
+};
+#endif
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index a2412e9..90fa228 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -62,6 +62,7 @@
size_t snapshot_ref_buf_size;
int snapshot_ref_cnt;
struct intel_pt_snapshot_ref *snapshot_refs;
+ size_t priv_size;
};
static int intel_pt_parse_terms_with_default(struct list_head *formats,
@@ -273,11 +274,37 @@
return attr;
}
-static size_t
-intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused,
- struct perf_evlist *evlist __maybe_unused)
+static const char *intel_pt_find_filter(struct perf_evlist *evlist,
+ struct perf_pmu *intel_pt_pmu)
{
- return INTEL_PT_AUXTRACE_PRIV_SIZE;
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == intel_pt_pmu->type)
+ return evsel->filter;
+ }
+
+ return NULL;
+}
+
+static size_t intel_pt_filter_bytes(const char *filter)
+{
+ size_t len = filter ? strlen(filter) : 0;
+
+ return len ? roundup(len + 1, 8) : 0;
+}
+
+static size_t
+intel_pt_info_priv_size(struct auxtrace_record *itr, struct perf_evlist *evlist)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu);
+
+ ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) +
+ intel_pt_filter_bytes(filter);
+
+ return ptr->priv_size;
}
static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
@@ -302,9 +329,13 @@
bool cap_user_time_zero = false, per_cpu_mmaps;
u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
+ unsigned long max_non_turbo_ratio;
+ size_t filter_str_len;
+ const char *filter;
+ u64 *info;
int err;
- if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE)
+ if (priv_size != ptr->priv_size)
return -EINVAL;
intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
@@ -317,6 +348,13 @@
intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
+ if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio",
+ "%lu", &max_non_turbo_ratio) != 1)
+ max_non_turbo_ratio = 0;
+
+ filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu);
+ filter_str_len = filter ? strlen(filter) : 0;
+
if (!session->evlist->nr_mmaps)
return -EINVAL;
@@ -351,6 +389,17 @@
auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n;
auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d;
auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit;
+ auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio;
+ auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len;
+
+ info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
+
+ if (filter_str_len) {
+ size_t len = intel_pt_filter_bytes(filter);
+
+ strncpy((char *)info, filter, len);
+ info += len >> 3;
+ }
return 0;
}
diff --git a/tools/perf/arch/xtensa/include/dwarf-regs-table.h b/tools/perf/arch/xtensa/include/dwarf-regs-table.h
new file mode 100644
index 0000000..aa0444a
--- /dev/null
+++ b/tools/perf/arch/xtensa/include/dwarf-regs-table.h
@@ -0,0 +1,8 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const xtensa_regstr_tbl[] = {
+ "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+ "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+#endif
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index f96e22e..2b9705a 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -16,6 +16,7 @@
#include <subcmd/parse-options.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/time64.h>
#include <errno.h>
#include "bench.h"
#include "futex.h"
@@ -62,7 +63,7 @@
printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
requeued_avg,
nthreads,
- requeuetime_avg/1e3,
+ requeuetime_avg / USEC_PER_MSEC,
rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
}
@@ -184,7 +185,7 @@
if (!silent) {
printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
- j + 1, nrequeued, nthreads, runtime.tv_usec/1e3);
+ j + 1, nrequeued, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
}
/* everybody should be blocked on futex2, wake'em up */
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 4a2ecd7..2c8fa67 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -15,6 +15,7 @@
#include <subcmd/parse-options.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/time64.h>
#include <errno.h>
#include "bench.h"
#include "futex.h"
@@ -156,7 +157,7 @@
printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
"in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
- nblocked_threads, waketime_avg/1e3,
+ nblocked_threads, waketime_avg / USEC_PER_MSEC,
rel_stddev_stats(waketime_stddev, waketime_avg));
}
@@ -172,7 +173,7 @@
printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
wakeup_avg,
nblocked_threads,
- waketime_avg/1e3,
+ waketime_avg / USEC_PER_MSEC,
rel_stddev_stats(waketime_stddev, waketime_avg));
}
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 87d8f4f..e246b1b 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -16,6 +16,7 @@
#include <subcmd/parse-options.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/time64.h>
#include <errno.h>
#include "bench.h"
#include "futex.h"
@@ -81,7 +82,7 @@
printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
wakeup_avg,
nthreads,
- waketime_avg/1e3,
+ waketime_avg / USEC_PER_MSEC,
rel_stddev_stats(waketime_stddev, waketime_avg));
}
@@ -182,7 +183,7 @@
if (!silent) {
printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
- j + 1, nwoken, nthreads, runtime.tv_usec/1e3);
+ j + 1, nwoken, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
}
for (i = 0; i < nthreads; i++) {
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 2b54d0f..c684910 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -21,6 +21,7 @@
#include <string.h>
#include <sys/time.h>
#include <errno.h>
+#include <linux/time64.h>
#define K 1024
@@ -89,7 +90,7 @@
static double timeval2double(struct timeval *ts)
{
- return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000;
+ return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
}
#define print_bps(x) do { \
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index f7f5300..8efe904 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -30,6 +30,7 @@
#include <sys/wait.h>
#include <sys/prctl.h>
#include <sys/types.h>
+#include <linux/time64.h>
#include <numa.h>
#include <numaif.h>
@@ -1004,7 +1005,7 @@
if (strong && process_groups == g->p.nr_proc) {
if (!*convergence) {
*convergence = runtime_ns_max;
- tprintf(" (%6.1fs converged)\n", *convergence/1e9);
+ tprintf(" (%6.1fs converged)\n", *convergence / NSEC_PER_SEC);
if (g->p.measure_convergence) {
g->all_converged = true;
g->stop_work = true;
@@ -1012,7 +1013,7 @@
}
} else {
if (*convergence) {
- tprintf(" (%6.1fs de-converged)", runtime_ns_max/1e9);
+ tprintf(" (%6.1fs de-converged)", runtime_ns_max / NSEC_PER_SEC);
*convergence = 0;
}
tprintf("\n");
@@ -1022,7 +1023,7 @@
static void show_summary(double runtime_ns_max, int l, double *convergence)
{
tprintf("\r # %5.1f%% [%.1f mins]",
- (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max/1e9 / 60.0);
+ (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max / NSEC_PER_SEC / 60.0);
calc_convergence(runtime_ns_max, convergence);
@@ -1179,8 +1180,8 @@
if (details >= 3) {
timersub(&stop, &start, &diff);
- runtime_ns_max = diff.tv_sec * 1000000000;
- runtime_ns_max += diff.tv_usec * 1000;
+ runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
+ runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
if (details >= 0) {
printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n",
@@ -1192,23 +1193,23 @@
continue;
timersub(&stop, &start0, &diff);
- runtime_ns_max = diff.tv_sec * 1000000000ULL;
- runtime_ns_max += diff.tv_usec * 1000ULL;
+ runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
+ runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
show_summary(runtime_ns_max, l, &convergence);
}
gettimeofday(&stop, NULL);
timersub(&stop, &start0, &diff);
- td->runtime_ns = diff.tv_sec * 1000000000ULL;
- td->runtime_ns += diff.tv_usec * 1000ULL;
- td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9;
+ td->runtime_ns = diff.tv_sec * NSEC_PER_SEC;
+ td->runtime_ns += diff.tv_usec * NSEC_PER_USEC;
+ td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9;
getrusage(RUSAGE_THREAD, &rusage);
- td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL;
- td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL;
- td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL;
- td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL;
+ td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC;
+ td->system_time_ns += rusage.ru_stime.tv_usec * NSEC_PER_USEC;
+ td->user_time_ns = rusage.ru_utime.tv_sec * NSEC_PER_SEC;
+ td->user_time_ns += rusage.ru_utime.tv_usec * NSEC_PER_USEC;
free_data(thread_data, g->p.bytes_thread);
@@ -1469,7 +1470,7 @@
}
/* Wait for all the threads to start up: */
while (g->nr_tasks_started != g->p.nr_tasks)
- usleep(1000);
+ usleep(USEC_PER_MSEC);
BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
@@ -1488,9 +1489,9 @@
timersub(&stop, &start, &diff);
- startup_sec = diff.tv_sec * 1000000000.0;
- startup_sec += diff.tv_usec * 1000.0;
- startup_sec /= 1e9;
+ startup_sec = diff.tv_sec * NSEC_PER_SEC;
+ startup_sec += diff.tv_usec * NSEC_PER_USEC;
+ startup_sec /= NSEC_PER_SEC;
tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
tprintf(" #\n");
@@ -1529,14 +1530,14 @@
tprintf("\n ###\n");
tprintf("\n");
- runtime_sec_max = diff.tv_sec * 1000000000.0;
- runtime_sec_max += diff.tv_usec * 1000.0;
- runtime_sec_max /= 1e9;
+ runtime_sec_max = diff.tv_sec * NSEC_PER_SEC;
+ runtime_sec_max += diff.tv_usec * NSEC_PER_USEC;
+ runtime_sec_max /= NSEC_PER_SEC;
- runtime_sec_min = runtime_ns_min/1e9;
+ runtime_sec_min = runtime_ns_min / NSEC_PER_SEC;
bytes = g->bytes_done;
- runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / 1e9;
+ runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / NSEC_PER_SEC;
if (g->p.measure_convergence) {
print_res(name, runtime_sec_max,
@@ -1562,7 +1563,7 @@
print_res(name, bytes / 1e9,
"GB,", "data-total", "GB data processed, total");
- print_res(name, runtime_sec_max * 1e9 / (bytes / g->p.nr_tasks),
+ print_res(name, runtime_sec_max * NSEC_PER_SEC / (bytes / g->p.nr_tasks),
"nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
@@ -1581,9 +1582,9 @@
snprintf(tname, 32, "process%d:thread%d", p, t);
print_res(tname, td->speed_gbs,
"GB/sec", "thread-speed", "GB/sec/thread speed");
- print_res(tname, td->system_time_ns / 1e9,
+ print_res(tname, td->system_time_ns / NSEC_PER_SEC,
"secs", "thread-system-time", "system CPU time/thread");
- print_res(tname, td->user_time_ns / 1e9,
+ print_res(tname, td->user_time_ns / NSEC_PER_SEC,
"secs", "thread-user-time", "user CPU time/thread");
}
}
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index bfaf950..6a111e7 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -29,6 +29,7 @@
#include <poll.h>
#include <limits.h>
#include <err.h>
+#include <linux/time64.h>
#define DATASIZE 100
@@ -312,11 +313,11 @@
thread_mode ? "threads" : "processes");
printf(" %14s: %lu.%03lu [sec]\n", "Total time",
diff.tv_sec,
- (unsigned long) (diff.tv_usec/1000));
+ (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
break;
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n", diff.tv_sec,
- (unsigned long) (diff.tv_usec/1000));
+ (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
break;
default:
/* reaching here is something disaster */
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 1dc2d13..2243f01 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -25,6 +25,7 @@
#include <sys/time.h>
#include <sys/types.h>
#include <sys/syscall.h>
+#include <linux/time64.h>
#include <pthread.h>
@@ -153,24 +154,24 @@
printf("# Executed %d pipe operations between two %s\n\n",
loops, threaded ? "threads" : "processes");
- result_usec = diff.tv_sec * 1000000;
+ result_usec = diff.tv_sec * USEC_PER_SEC;
result_usec += diff.tv_usec;
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
diff.tv_sec,
- (unsigned long) (diff.tv_usec/1000));
+ (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
printf(" %14lf usecs/op\n",
(double)result_usec / (double)loops);
printf(" %14d ops/sec\n",
(int)((double)loops /
- ((double)result_usec / (double)1000000)));
+ ((double)result_usec / (double)USEC_PER_SEC)));
break;
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n",
diff.tv_sec,
- (unsigned long) (diff.tv_usec / 1000));
+ (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
break;
default:
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 9c1034d..ebb6283 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -30,6 +30,7 @@
#include "util/tool.h"
#include "util/data.h"
#include "arch/common.h"
+#include "util/block-range.h"
#include <dlfcn.h>
#include <linux/bitmap.h>
@@ -46,6 +47,103 @@
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
+/*
+ * Given one basic block:
+ *
+ * from to branch_i
+ * * ----> *
+ * |
+ * | block
+ * v
+ * * ----> *
+ * from to branch_i+1
+ *
+ * where the horizontal are the branches and the vertical is the executed
+ * block of instructions.
+ *
+ * We count, for each 'instruction', the number of blocks that covered it as
+ * well as count the ratio each branch is taken.
+ *
+ * We can do this without knowing the actual instruction stream by keeping
+ * track of the address ranges. We break down ranges such that there is no
+ * overlap and iterate from the start until the end.
+ *
+ * @acme: once we parse the objdump output _before_ processing the samples,
+ * we can easily fold the branch.cycles IPC bits in.
+ */
+static void process_basic_block(struct addr_map_symbol *start,
+ struct addr_map_symbol *end,
+ struct branch_flags *flags)
+{
+ struct symbol *sym = start->sym;
+ struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
+ struct block_range_iter iter;
+ struct block_range *entry;
+
+ /*
+ * Sanity; NULL isn't executable and the CPU cannot execute backwards
+ */
+ if (!start->addr || start->addr > end->addr)
+ return;
+
+ iter = block_range__create(start->addr, end->addr);
+ if (!block_range_iter__valid(&iter))
+ return;
+
+ /*
+ * First block in range is a branch target.
+ */
+ entry = block_range_iter(&iter);
+ assert(entry->is_target);
+ entry->entry++;
+
+ do {
+ entry = block_range_iter(&iter);
+
+ entry->coverage++;
+ entry->sym = sym;
+
+ if (notes)
+ notes->max_coverage = max(notes->max_coverage, entry->coverage);
+
+ } while (block_range_iter__next(&iter));
+
+ /*
+ * Last block in rage is a branch.
+ */
+ entry = block_range_iter(&iter);
+ assert(entry->is_branch);
+ entry->taken++;
+ if (flags->predicted)
+ entry->pred++;
+}
+
+static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
+ struct perf_sample *sample)
+{
+ struct addr_map_symbol *prev = NULL;
+ struct branch_info *bi;
+ int i;
+
+ if (!bs || !bs->nr)
+ return;
+
+ bi = sample__resolve_bstack(sample, al);
+ if (!bi)
+ return;
+
+ for (i = bs->nr - 1; i >= 0; i--) {
+ /*
+ * XXX filter against symbol
+ */
+ if (prev)
+ process_basic_block(prev, &bi[i].from, &bi[i].flags);
+ prev = &bi[i].to;
+ }
+
+ free(bi);
+}
+
static int perf_evsel__add_sample(struct perf_evsel *evsel,
struct perf_sample *sample,
struct addr_location *al,
@@ -72,6 +170,12 @@
return 0;
}
+ /*
+ * XXX filtered samples can still have branch entires pointing into our
+ * symbol and are missed.
+ */
+ process_branch_stack(sample->branch_stack, al, sample);
+
sample->period = 1;
sample->weight = 1;
@@ -204,8 +308,6 @@
struct perf_evsel *pos;
u64 total_nr_samples;
- machines__set_symbol_filter(&session->machines, symbol__annotate_init);
-
if (ann->cpu_list) {
ret = perf_session__cpu_bitmap(session, ann->cpu_list,
ann->cpu_bitmap);
@@ -367,7 +469,10 @@
if (annotate.session == NULL)
return -1;
- symbol_conf.priv_size = sizeof(struct annotation);
+ ret = symbol__annotation_init();
+ if (ret < 0)
+ goto out_delete;
+
symbol_conf.try_vmlinux_path = true;
ret = symbol__init(&annotate.session->header.env);
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 21ee753..9ff0db4 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1033,7 +1033,9 @@
}
static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct hists *hists __maybe_unused)
+ struct hists *hists __maybe_unused,
+ int line __maybe_unused,
+ int *span __maybe_unused)
{
struct diff_hpp_fmt *dfmt =
container_of(fmt, struct diff_hpp_fmt, fmt);
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 73c1c4c..b9bc7e3 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -429,7 +429,7 @@
if (al.map != NULL) {
if (!al.map->dso->hit) {
al.map->dso->hit = 1;
- if (map__load(al.map, NULL) >= 0) {
+ if (map__load(al.map) >= 0) {
dso__inject_build_id(al.map->dso, tool, machine);
/*
* If this fails, too bad, let the other side
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index fdde1bd..d426dcb 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -330,7 +330,7 @@
}
kernel_map = machine__kernel_map(machine);
- if (map__load(kernel_map, NULL) < 0) {
+ if (map__load(kernel_map) < 0) {
pr_err("cannot load kernel map\n");
return -ENOENT;
}
@@ -979,7 +979,7 @@
if (is_caller) {
addr = data->call_site;
if (!raw_ip)
- sym = machine__find_kernel_function(machine, addr, &map, NULL);
+ sym = machine__find_kernel_function(machine, addr, &map);
} else
addr = data->ptr;
@@ -1043,8 +1043,7 @@
char *caller = buf;
data = rb_entry(next, struct page_stat, node);
- sym = machine__find_kernel_function(machine, data->callsite,
- &map, NULL);
+ sym = machine__find_kernel_function(machine, data->callsite, &map);
if (sym && sym->name)
caller = sym->name;
else
@@ -1086,8 +1085,7 @@
char *caller = buf;
data = rb_entry(next, struct page_stat, node);
- sym = machine__find_kernel_function(machine, data->callsite,
- &map, NULL);
+ sym = machine__find_kernel_function(machine, data->callsite, &map);
if (sym && sym->name)
caller = sym->name;
else
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 5e2127e..08fa88f 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -24,6 +24,7 @@
#include <sys/timerfd.h>
#endif
+#include <linux/time64.h>
#include <termios.h>
#include <semaphore.h>
#include <pthread.h>
@@ -362,7 +363,7 @@
if (!skip_event(decode)) {
pr_info("%" PRIu64 " VM %d, vcpu %d: %s event took %" PRIu64 "usec\n",
sample->time, sample->pid, vcpu_record->vcpu_id,
- decode, time_diff/1000);
+ decode, time_diff / NSEC_PER_USEC);
}
}
@@ -608,15 +609,15 @@
pr_info("%10llu ", (unsigned long long)ecount);
pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
- pr_info("%9.2fus ", (double)min / 1e3);
- pr_info("%9.2fus ", (double)max / 1e3);
- pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount/1e3,
+ pr_info("%9.2fus ", (double)min / NSEC_PER_USEC);
+ pr_info("%9.2fus ", (double)max / NSEC_PER_USEC);
+ pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount / NSEC_PER_USEC,
kvm_event_rel_stddev(vcpu, event));
pr_info("\n");
}
pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
- kvm->total_count, kvm->total_time / 1e3);
+ kvm->total_count, kvm->total_time / (double)NSEC_PER_USEC);
if (kvm->lost_events)
pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index ee5b421..f87996b 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -326,6 +326,11 @@
if (ret < 0)
goto out_cleanup;
+ if (params.command == 'D') { /* it shows definition */
+ ret = show_probe_trace_events(pevs, npevs);
+ goto out_cleanup;
+ }
+
ret = apply_perf_probe_events(pevs, npevs);
if (ret < 0)
goto out_cleanup;
@@ -454,6 +459,14 @@
return ret;
}
+#ifdef HAVE_DWARF_SUPPORT
+#define PROBEDEF_STR \
+ "[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT [[NAME=]ARG ...]"
+#else
+#define PROBEDEF_STR "[EVENT=]FUNC[+OFF|%return] [[NAME=]ARG ...]"
+#endif
+
+
static int
__cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
{
@@ -479,13 +492,7 @@
opt_set_filter_with_command, DEFAULT_LIST_FILTER),
OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
opt_set_filter_with_command),
- OPT_CALLBACK('a', "add", NULL,
-#ifdef HAVE_DWARF_SUPPORT
- "[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT"
- " [[NAME=]ARG ...]",
-#else
- "[EVENT=]FUNC[+OFF|%return] [[NAME=]ARG ...]",
-#endif
+ OPT_CALLBACK('a', "add", NULL, PROBEDEF_STR,
"probe point definition, where\n"
"\t\tGROUP:\tGroup name (optional)\n"
"\t\tEVENT:\tEvent name\n"
@@ -503,6 +510,9 @@
"\t\tARG:\tProbe argument (kprobe-tracer argument format.)\n",
#endif
opt_add_probe_event),
+ OPT_CALLBACK('D', "definition", NULL, PROBEDEF_STR,
+ "Show trace event definition of given traceevent for k/uprobe_events.",
+ opt_add_probe_event),
OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events"
" with existing name"),
OPT_CALLBACK('L', "line", NULL,
@@ -548,6 +558,7 @@
set_option_flag(options, 'a', "add", PARSE_OPT_EXCLUSIVE);
set_option_flag(options, 'd', "del", PARSE_OPT_EXCLUSIVE);
+ set_option_flag(options, 'D', "definition", PARSE_OPT_EXCLUSIVE);
set_option_flag(options, 'l', "list", PARSE_OPT_EXCLUSIVE);
#ifdef HAVE_DWARF_SUPPORT
set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE);
@@ -600,6 +611,14 @@
*/
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
+ /*
+ * Except for --list, --del and --add, other command doesn't depend
+ * nor change running kernel. So if user gives offline vmlinux,
+ * ignore its buildid.
+ */
+ if (!strchr("lda", params.command) && symbol_conf.vmlinux_name)
+ symbol_conf.ignore_vmlinux_buildid = true;
+
switch (params.command) {
case 'l':
if (params.uprobes) {
@@ -643,7 +662,9 @@
return ret;
}
break;
+ case 'D':
case 'a':
+
/* Ensure the last given target is used */
if (params.target && !params.target_used) {
pr_err(" Error: -x/-m must follow the probe definitions.\n");
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6355902..67d2a90 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -22,6 +22,7 @@
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/debug.h"
+#include "util/drv_configs.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/symbol.h"
@@ -42,7 +43,7 @@
#include <sched.h>
#include <sys/mman.h>
#include <asm/bug.h>
-
+#include <linux/time64.h>
struct record {
struct perf_tool tool;
@@ -96,7 +97,7 @@
*start = head;
while (true) {
if (evt_head - head >= (unsigned int)size) {
- pr_debug("Finshed reading backward ring buffer: rewind\n");
+ pr_debug("Finished reading backward ring buffer: rewind\n");
if (evt_head - head > (unsigned int)size)
evt_head -= pheader->size;
*end = evt_head;
@@ -106,7 +107,7 @@
pheader = (struct perf_event_header *)(buf + (evt_head & mask));
if (pheader->size == 0) {
- pr_debug("Finshed reading backward ring buffer: get start\n");
+ pr_debug("Finished reading backward ring buffer: get start\n");
*end = evt_head;
return 0;
}
@@ -383,6 +384,7 @@
struct perf_evlist *evlist = rec->evlist;
struct perf_session *session = rec->session;
struct record_opts *opts = &rec->opts;
+ struct perf_evsel_config_term *err_term;
int rc = 0;
perf_evlist__config(evlist, opts, &callchain_param);
@@ -412,6 +414,14 @@
goto out;
}
+ if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
+ error("failed to set config \"%s\" on event %s with %d (%s)\n",
+ err_term->val.drv_cfg, perf_evsel__name(pos), errno,
+ str_error_r(errno, msg, sizeof(msg)));
+ rc = -1;
+ goto out;
+ }
+
rc = record__mmap(rec);
if (rc)
goto out;
@@ -954,7 +964,7 @@
}
if (opts->initial_delay) {
- usleep(opts->initial_delay * 1000);
+ usleep(opts->initial_delay * USEC_PER_MSEC);
perf_evlist__enable(rec->evlist);
}
@@ -1563,29 +1573,39 @@
if (!rec->itr) {
rec->itr = auxtrace_record__init(rec->evlist, &err);
if (err)
- return err;
+ goto out;
}
err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
rec->opts.auxtrace_snapshot_opts);
if (err)
- return err;
+ goto out;
+
+ /*
+ * Allow aliases to facilitate the lookup of symbols for address
+ * filters. Refer to auxtrace_parse_filters().
+ */
+ symbol_conf.allow_aliases = true;
+
+ symbol__init(NULL);
+
+ err = auxtrace_parse_filters(rec->evlist);
+ if (err)
+ goto out;
if (dry_run)
- return 0;
+ goto out;
err = bpf__setup_stdout(rec->evlist);
if (err) {
bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
pr_err("ERROR: Setup BPF stdout failed: %s\n",
errbuf);
- return err;
+ goto out;
}
err = -ENOMEM;
- symbol__init(NULL);
-
if (symbol_conf.kptr_restrict)
pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
@@ -1633,7 +1653,7 @@
if (rec->evlist->nr_entries == 0 &&
perf_evlist__add_default(rec->evlist) < 0) {
pr_err("Not enough memory for event selector list\n");
- goto out_symbol_exit;
+ goto out;
}
if (rec->opts.target.tid && !rec->opts.no_inherit_set)
@@ -1653,7 +1673,7 @@
ui__error("%s", errbuf);
err = -saved_errno;
- goto out_symbol_exit;
+ goto out;
}
err = -ENOMEM;
@@ -1662,7 +1682,7 @@
err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
if (err)
- goto out_symbol_exit;
+ goto out;
/*
* We take all buildids when the file contains
@@ -1674,11 +1694,11 @@
if (record_opts__config(&rec->opts)) {
err = -EINVAL;
- goto out_symbol_exit;
+ goto out;
}
err = __cmd_record(&record, argc, argv);
-out_symbol_exit:
+out:
perf_evlist__delete(rec->evlist);
symbol__exit();
auxtrace_record__free(rec->itr);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 949e5a1..6e88460 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -89,6 +89,10 @@
rep->queue_size = perf_config_u64(var, value);
return 0;
}
+ if (!strcmp(var, "report.sort_order")) {
+ default_sort_order = strdup(value);
+ return 0;
+ }
return 0;
}
@@ -931,7 +935,6 @@
if (symbol_conf.report_hierarchy) {
/* disable incompatible options */
- symbol_conf.event_group = false;
symbol_conf.cumulate_callchain = false;
if (field_order) {
@@ -980,9 +983,9 @@
* implementation.
*/
if (ui__has_annotation()) {
- symbol_conf.priv_size = sizeof(struct annotation);
- machines__set_symbol_filter(&session->machines,
- symbol__annotate_init);
+ ret = symbol__annotation_init();
+ if (ret < 0)
+ goto error;
/*
* For searching by name on the "Browse map details".
* providing it only in verbose mode not to bloat too
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 0dfe8df..f5503ca 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -26,6 +26,7 @@
#include <pthread.h>
#include <math.h>
#include <api/fs/fs.h>
+#include <linux/time64.h>
#define PR_SET_NAME 15 /* Set process name */
#define MAX_CPUS 4096
@@ -199,7 +200,7 @@
clock_gettime(CLOCK_MONOTONIC, &ts);
- return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+ return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
}
static void burn_nsecs(struct perf_sched *sched, u64 nsecs)
@@ -223,7 +224,7 @@
static void calibrate_run_measurement_overhead(struct perf_sched *sched)
{
- u64 T0, T1, delta, min_delta = 1000000000ULL;
+ u64 T0, T1, delta, min_delta = NSEC_PER_SEC;
int i;
for (i = 0; i < 10; i++) {
@@ -240,7 +241,7 @@
static void calibrate_sleep_measurement_overhead(struct perf_sched *sched)
{
- u64 T0, T1, delta, min_delta = 1000000000ULL;
+ u64 T0, T1, delta, min_delta = NSEC_PER_SEC;
int i;
for (i = 0; i < 10; i++) {
@@ -452,8 +453,8 @@
err = getrusage(RUSAGE_SELF, &ru);
BUG_ON(err);
- sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3;
- sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3;
+ sum = ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC;
+ sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC;
return sum;
}
@@ -667,12 +668,12 @@
sched->run_avg = delta;
sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat;
- printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / 1000000.0);
+ printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC);
- printf("ravg: %0.2f, ", (double)sched->run_avg / 1e6);
+ printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC);
printf("cpu: %0.2f / %0.2f",
- (double)sched->cpu_usage / 1e6, (double)sched->runavg_cpu_usage / 1e6);
+ (double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC);
#if 0
/*
@@ -680,8 +681,8 @@
* accurate than the sched->sum_exec_runtime based statistics:
*/
printf(" [%0.2f / %0.2f]",
- (double)sched->parent_cpu_usage/1e6,
- (double)sched->runavg_parent_cpu_usage/1e6);
+ (double)sched->parent_cpu_usage / NSEC_PER_MSEC,
+ (double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC);
#endif
printf("\n");
@@ -696,13 +697,13 @@
u64 T0, T1;
T0 = get_nsecs();
- burn_nsecs(sched, 1e6);
+ burn_nsecs(sched, NSEC_PER_MSEC);
T1 = get_nsecs();
printf("the run test took %" PRIu64 " nsecs\n", T1 - T0);
T0 = get_nsecs();
- sleep_nsecs(1e6);
+ sleep_nsecs(NSEC_PER_MSEC);
T1 = get_nsecs();
printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0);
@@ -1213,10 +1214,10 @@
avg = work_list->total_lat / work_list->nb_atoms;
printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13.6f s\n",
- (double)work_list->total_runtime / 1e6,
- work_list->nb_atoms, (double)avg / 1e6,
- (double)work_list->max_lat / 1e6,
- (double)work_list->max_lat_at / 1e9);
+ (double)work_list->total_runtime / NSEC_PER_MSEC,
+ work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
+ (double)work_list->max_lat / NSEC_PER_MSEC,
+ (double)work_list->max_lat_at / NSEC_PER_SEC);
}
static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
@@ -1491,7 +1492,7 @@
if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
goto out;
- color_fprintf(stdout, color, " %12.6f secs ", (double)timestamp/1e9);
+ color_fprintf(stdout, color, " %12.6f secs ", (double)timestamp / NSEC_PER_SEC);
if (new_shortname) {
const char *pid_color = color;
@@ -1753,7 +1754,7 @@
printf(" -----------------------------------------------------------------------------------------------------------------\n");
printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n",
- (double)sched->all_runtime / 1e6, sched->all_count);
+ (double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count);
printf(" ---------------------------------------------------\n");
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c859e59..7228d14 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -24,6 +24,7 @@
#include "util/thread-stack.h"
#include <linux/bitmap.h>
#include <linux/stringify.h>
+#include <linux/time64.h>
#include "asm/bug.h"
#include "util/mem-events.h"
@@ -464,9 +465,9 @@
if (PRINT_FIELD(TIME)) {
nsecs = sample->time;
- secs = nsecs / NSECS_PER_SEC;
- nsecs -= secs * NSECS_PER_SEC;
- usecs = nsecs / NSECS_PER_USEC;
+ secs = nsecs / NSEC_PER_SEC;
+ nsecs -= secs * NSEC_PER_SEC;
+ usecs = nsecs / NSEC_PER_USEC;
if (nanosecs)
printf("%5lu.%09llu: ", secs, nsecs);
else
@@ -521,11 +522,11 @@
thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
if (alf.map)
- alf.sym = map__find_symbol(alf.map, alf.addr, NULL);
+ alf.sym = map__find_symbol(alf.map, alf.addr);
thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
if (alt.map)
- alt.sym = map__find_symbol(alt.map, alt.addr, NULL);
+ alt.sym = map__find_symbol(alt.map, alt.addr);
symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
putchar('/');
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3c7452b..688dea7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -52,6 +52,7 @@
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/debug.h"
+#include "util/drv_configs.h"
#include "util/color.h"
#include "util/stat.h"
#include "util/header.h"
@@ -65,6 +66,7 @@
#include "util/group.h"
#include "asm/bug.h"
+#include <linux/time64.h>
#include <api/fs/fs.h>
#include <stdlib.h>
#include <sys/prctl.h>
@@ -172,7 +174,7 @@
{
r->tv_sec = a->tv_sec - b->tv_sec;
if (a->tv_nsec < b->tv_nsec) {
- r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
+ r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
r->tv_sec--;
} else {
r->tv_nsec = a->tv_nsec - b->tv_nsec ;
@@ -354,7 +356,7 @@
diff_timespec(&rs, &ts, &ref_time);
if (STAT_RECORD) {
- if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL))
+ if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
pr_err("failed to write stat round event\n");
}
@@ -364,7 +366,7 @@
static void enable_counters(void)
{
if (initial_delay)
- usleep(initial_delay * 1000);
+ usleep(initial_delay * USEC_PER_MSEC);
/*
* We need to enable counters only if:
@@ -539,10 +541,11 @@
int status = 0;
const bool forks = (argc > 0);
bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
+ struct perf_evsel_config_term *err_term;
if (interval) {
- ts.tv_sec = interval / 1000;
- ts.tv_nsec = (interval % 1000) * 1000000;
+ ts.tv_sec = interval / USEC_PER_MSEC;
+ ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
} else {
ts.tv_sec = 1;
ts.tv_nsec = 0;
@@ -610,6 +613,13 @@
return -1;
}
+ if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
+ error("failed to set config \"%s\" on event %s with %d (%s)\n",
+ err_term->val.drv_cfg, perf_evsel__name(counter), errno,
+ str_error_r(errno, msg, sizeof(msg)));
+ return -1;
+ }
+
if (STAT_RECORD) {
int err, fd = perf_data_file__fd(&perf_stat.file);
@@ -971,7 +981,7 @@
static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
{
FILE *output = stat_config.output;
- double msecs = avg / 1e6;
+ double msecs = avg / NSEC_PER_MSEC;
const char *fmt_v, *fmt_n;
char name[25];
@@ -1460,7 +1470,7 @@
if (!null_run)
fprintf(output, "\n");
fprintf(output, " %17.9f seconds time elapsed",
- avg_stats(&walltime_nsecs_stats)/1e9);
+ avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC);
if (run_count > 1) {
fprintf(output, " ");
print_noise_pct(stddev_stats(&walltime_nsecs_stats),
@@ -2175,8 +2185,8 @@
update_stats(&walltime_nsecs_stats, stat_round->time);
if (stat_config.interval && stat_round->time) {
- tsh.tv_sec = stat_round->time / NSECS_PER_SEC;
- tsh.tv_nsec = stat_round->time % NSECS_PER_SEC;
+ tsh.tv_sec = stat_round->time / NSEC_PER_SEC;
+ tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
ts = &tsh;
}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 733a554..e7eaa29 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -24,6 +24,7 @@
#include "util/evlist.h"
#include "util/evsel.h"
#include <linux/rbtree.h>
+#include <linux/time64.h>
#include "util/symbol.h"
#include "util/callchain.h"
#include "util/strlist.h"
@@ -1288,9 +1289,9 @@
if (c->comm) {
char comm[256];
if (c->total_time > 5000000000) /* 5 seconds */
- sprintf(comm, "%s:%i (%2.2fs)", c->comm, p->pid, c->total_time / 1000000000.0);
+ sprintf(comm, "%s:%i (%2.2fs)", c->comm, p->pid, c->total_time / (double)NSEC_PER_SEC);
else
- sprintf(comm, "%s:%i (%3.1fms)", c->comm, p->pid, c->total_time / 1000000.0);
+ sprintf(comm, "%s:%i (%3.1fms)", c->comm, p->pid, c->total_time / (double)NSEC_PER_MSEC);
svg_text(Y, c->start_time, comm);
}
@@ -1637,7 +1638,7 @@
write_svg_file(tchart, output_name);
pr_info("Written %2.1f seconds of trace to %s.\n",
- (tchart->last_time - tchart->first_time) / 1000000000.0, output_name);
+ (tchart->last_time - tchart->first_time) / (double)NSEC_PER_SEC, output_name);
out_delete:
perf_session__delete(session);
return ret;
@@ -1901,10 +1902,10 @@
if (sscanf(arg, "%" PRIu64 "%cs", value, &unit) > 0) {
switch (unit) {
case 'm':
- *value *= 1000000;
+ *value *= NSEC_PER_MSEC;
break;
case 'u':
- *value *= 1000;
+ *value *= NSEC_PER_USEC;
break;
case 'n':
break;
@@ -1928,7 +1929,7 @@
.ordered_events = true,
},
.proc_num = 15,
- .min_time = 1000000,
+ .min_time = NSEC_PER_MSEC,
.merge_dist = 1000,
};
const char *output_name = "output.svg";
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 418ed94..fe3af95 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -24,6 +24,7 @@
#include "util/annotate.h"
#include "util/config.h"
#include "util/color.h"
+#include "util/drv_configs.h"
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/machine.h"
@@ -68,6 +69,7 @@
#include <sys/mman.h>
#include <linux/stringify.h>
+#include <linux/time64.h>
#include <linux/types.h>
static volatile int done;
@@ -624,7 +626,7 @@
display_setup_sig();
pthread__unblock_sigwinch();
repeat:
- delay_msecs = top->delay_secs * 1000;
+ delay_msecs = top->delay_secs * MSEC_PER_SEC;
set_term_quiet_input(&save);
/* trash return*/
getc(stdin);
@@ -656,34 +658,6 @@
return NULL;
}
-static int symbol_filter(struct map *map, struct symbol *sym)
-{
- const char *name = sym->name;
-
- if (!__map__is_kernel(map))
- return 0;
- /*
- * ppc64 uses function descriptors and appends a '.' to the
- * start of every instruction address. Remove it.
- */
- if (name[0] == '.')
- name++;
-
- if (!strcmp(name, "_text") ||
- !strcmp(name, "_etext") ||
- !strcmp(name, "_sinittext") ||
- !strncmp("init_module", name, 11) ||
- !strncmp("cleanup_module", name, 14) ||
- strstr(name, "_text_start") ||
- strstr(name, "_text_end"))
- return 1;
-
- if (symbol__is_idle(sym))
- sym->ignore = true;
-
- return 0;
-}
-
static int hist_iter__top_callback(struct hist_entry_iter *iter,
struct addr_location *al, bool single,
void *arg)
@@ -782,7 +756,7 @@
}
}
- if (al.sym == NULL || !al.sym->ignore) {
+ if (al.sym == NULL || !al.sym->idle) {
struct hists *hists = evsel__hists(evsel);
struct hist_entry_iter iter = {
.evsel = evsel,
@@ -940,6 +914,10 @@
static int __cmd_top(struct perf_top *top)
{
+ char msg[512];
+ struct perf_evsel *pos;
+ struct perf_evsel_config_term *err_term;
+ struct perf_evlist *evlist = top->evlist;
struct record_opts *opts = &top->record_opts;
pthread_t thread;
int ret;
@@ -948,8 +926,6 @@
if (top->session == NULL)
return -1;
- machines__set_symbol_filter(&top->session->machines, symbol_filter);
-
if (!objdump_path) {
ret = perf_env__lookup_objdump(&top->session->header.env);
if (ret)
@@ -976,6 +952,14 @@
if (ret)
goto out_delete;
+ ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term);
+ if (ret) {
+ error("failed to set config \"%s\" on event %s with %d (%s)\n",
+ err_term->val.drv_cfg, perf_evsel__name(pos), errno,
+ str_error_r(errno, msg, sizeof(msg)));
+ goto out_delete;
+ }
+
top->session->evlist = top->evlist;
perf_session__set_id_hdr_size(top->session);
@@ -1323,7 +1307,9 @@
if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
callchain_param.order = ORDER_CALLER;
- symbol_conf.priv_size = sizeof(struct annotation);
+ status = symbol__annotation_init();
+ if (status < 0)
+ goto out_delete_evlist;
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
if (symbol__init(NULL) < 0)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index b8c6766..c298bd3 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -45,6 +45,7 @@
#include <linux/audit.h>
#include <linux/random.h>
#include <linux/stringify.h>
+#include <linux/time64.h>
#ifndef O_CLOEXEC
# define O_CLOEXEC 02000000
@@ -741,6 +742,8 @@
.arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
{ .name = "rt_tgsigqueueinfo", .errmsg = true,
.arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
+ { .name = "sched_getattr", .errmsg = true, },
+ { .name = "sched_setattr", .errmsg = true, },
{ .name = "sched_setscheduler", .errmsg = true,
.arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
{ .name = "seccomp", .errmsg = true,
@@ -2140,6 +2143,7 @@
static int trace__set_ev_qualifier_filter(struct trace *trace)
{
int err = -1;
+ struct perf_evsel *sys_exit;
char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
trace->ev_qualifier_ids.nr,
trace->ev_qualifier_ids.entries);
@@ -2147,8 +2151,11 @@
if (filter == NULL)
goto out_enomem;
- if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
- err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
+ if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
+ filter)) {
+ sys_exit = trace->syscalls.events.sys_exit;
+ err = perf_evsel__append_tp_filter(sys_exit, filter);
+ }
free(filter);
out:
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 7ed72a47..e4b717e 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -20,7 +20,6 @@
#endif
#ifdef __powerpc__
-#include "../../arch/powerpc/include/uapi/asm/unistd.h"
#define CPUINFO_PROC {"cpu"}
#endif
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index cb0f135..9a0236a 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -14,13 +14,6 @@
#define HAVE_ATTR_TEST
#include "perf-sys.h"
-#ifndef NSEC_PER_SEC
-# define NSEC_PER_SEC 1000000000ULL
-#endif
-#ifndef NSEC_PER_USEC
-# define NSEC_PER_USEC 1000ULL
-#endif
-
static inline unsigned long long rdclock(void)
{
struct timespec ts;
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index dc51bc5..8a4ce49 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -71,7 +71,7 @@
$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
$(Q)echo ';' >> $@
-ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
+ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc))
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
endif
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index 615780c..e6d1816 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -97,7 +97,7 @@
evlist = perf_evlist__new();
if (!evlist) {
- pr_debug("No ehough memory to create evlist\n");
+ pr_debug("No enough memory to create evlist\n");
return TEST_FAIL;
}
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index fc54064..2673e86 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -125,7 +125,7 @@
/* Instead of perf_evlist__new_default, don't add default events */
evlist = perf_evlist__new();
if (!evlist) {
- pr_debug("No ehough memory to create evlist\n");
+ pr_debug("No enough memory to create evlist\n");
return TEST_FAIL;
}
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 2af156a8..ff5bc63 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -263,7 +263,7 @@
* Converting addresses for use by objdump requires more information.
* map__load() does that. See map__rip_2objdump() for details.
*/
- if (map__load(al.map, NULL))
+ if (map__load(al.map))
return -1;
/* objdump struggles with kcore - try each map only once */
@@ -511,7 +511,7 @@
/* Load kernel map */
map = machine__kernel_map(machine);
- ret = map__load(map, NULL);
+ ret = map__load(map);
if (ret < 0) {
pr_debug("map__load failed\n");
goto out_err;
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 8f6eb85..1046491 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -11,7 +11,7 @@
#include "thread.h"
#include "callchain.h"
-#if defined (__x86_64__) || defined (__i386__)
+#if defined (__x86_64__) || defined (__i386__) || defined (__powerpc__)
#include "arch-tests.h"
#endif
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index e63abab..a508233 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -8,14 +8,6 @@
#include "debug.h"
#include "machine.h"
-static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
- struct symbol *sym)
-{
- bool *visited = symbol__priv(sym);
- *visited = true;
- return 0;
-}
-
#define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
@@ -28,6 +20,7 @@
enum map_type type = MAP__FUNCTION;
struct maps *maps = &vmlinux.kmaps.maps[type];
u64 mem_start, mem_end;
+ bool header_printed;
/*
* Step 1:
@@ -61,7 +54,7 @@
* be compacted against the list of modules found in the "vmlinux"
* code and with the one got from /proc/modules from the "kallsyms" code.
*/
- if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true, NULL) <= 0) {
+ if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true) <= 0) {
pr_debug("dso__load_kallsyms ");
goto out;
}
@@ -99,8 +92,7 @@
* maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
* to fixup the symbols.
*/
- if (machine__load_vmlinux_path(&vmlinux, type,
- vmlinux_matches_kallsyms_filter) <= 0) {
+ if (machine__load_vmlinux_path(&vmlinux, type) <= 0) {
pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
err = TEST_SKIP;
goto out;
@@ -126,7 +118,7 @@
mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
first_pair = machine__find_kernel_symbol(&kallsyms, type,
- mem_start, NULL, NULL);
+ mem_start, NULL);
pair = first_pair;
if (pair && UM(pair->start) == mem_start) {
@@ -143,7 +135,7 @@
*/
s64 skew = mem_end - UM(pair->end);
if (llabs(skew) >= page_size)
- pr_debug("%#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
+ pr_debug("WARN: %#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
mem_start, sym->name, mem_end,
UM(pair->end));
@@ -154,22 +146,23 @@
* kallsyms.
*/
continue;
-
} else {
- pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL);
+ pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL);
if (pair) {
if (UM(pair->start) == mem_start)
goto next_pair;
- pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+ pr_debug("WARN: %#" PRIx64 ": diff name v: %s k: %s\n",
mem_start, sym->name, pair->name);
} else {
- pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+ pr_debug("WARN: %#" PRIx64 ": diff name v: %s k: %s\n",
mem_start, sym->name, first_pair->name);
}
+
+ continue;
}
} else
- pr_debug("%#" PRIx64 ": %s not on kallsyms\n",
+ pr_debug("ERR : %#" PRIx64 ": %s not on kallsyms\n",
mem_start, sym->name);
err = -1;
@@ -178,7 +171,7 @@
if (!verbose)
goto out;
- pr_info("Maps only in vmlinux:\n");
+ header_printed = false;
for (map = maps__first(maps); map; map = map__next(map)) {
struct map *
@@ -192,13 +185,18 @@
(map->dso->kernel ?
map->dso->short_name :
map->dso->name));
- if (pair)
+ if (pair) {
pair->priv = 1;
- else
+ } else {
+ if (!header_printed) {
+ pr_info("WARN: Maps only in vmlinux:\n");
+ header_printed = true;
+ }
map__fprintf(map, stderr);
+ }
}
- pr_info("Maps in vmlinux with a different name in kallsyms:\n");
+ header_printed = false;
for (map = maps__first(maps); map; map = map__next(map)) {
struct map *pair;
@@ -211,24 +209,33 @@
continue;
if (pair->start == mem_start) {
- pair->priv = 1;
- pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
+ if (!header_printed) {
+ pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n");
+ header_printed = true;
+ }
+
+ pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
map->start, map->end, map->pgoff, map->dso->name);
if (mem_end != pair->end)
- pr_info(":\n*%" PRIx64 "-%" PRIx64 " %" PRIx64,
+ pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64,
pair->start, pair->end, pair->pgoff);
pr_info(" %s\n", pair->dso->name);
pair->priv = 1;
}
}
- pr_info("Maps only in kallsyms:\n");
+ header_printed = false;
maps = &kallsyms.kmaps.maps[type];
for (map = maps__first(maps); map; map = map__next(map)) {
- if (!map->priv)
+ if (!map->priv) {
+ if (!header_printed) {
+ pr_info("WARN: Maps only in kallsyms:\n");
+ header_printed = true;
+ }
map__fprintf(map, stderr);
+ }
}
out:
machine__exit(&kallsyms);
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index d0a3a8e..fd710ab 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -1,8 +1,4 @@
-#include <sys/mman.h>
-
-#ifndef PROT_SEM
-#define PROT_SEM 0x8
-#endif
+#include <uapi/linux/mman.h>
static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
struct syscall_arg *arg)
@@ -33,31 +29,6 @@
#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
-#ifndef MAP_FIXED
-#define MAP_FIXED 0x10
-#endif
-
-#ifndef MAP_ANONYMOUS
-#define MAP_ANONYMOUS 0x20
-#endif
-
-#ifndef MAP_32BIT
-#define MAP_32BIT 0x40
-#endif
-
-#ifndef MAP_STACK
-#define MAP_STACK 0x20000
-#endif
-
-#ifndef MAP_HUGETLB
-#define MAP_HUGETLB 0x40000
-#endif
-
-#ifndef MAP_UNINITIALIZED
-#define MAP_UNINITIALIZED 0x4000000
-#endif
-
-
static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -95,13 +66,6 @@
#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
-#ifndef MREMAP_MAYMOVE
-#define MREMAP_MAYMOVE 1
-#endif
-#ifndef MREMAP_FIXED
-#define MREMAP_FIXED 2
-#endif
-
static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -125,39 +89,6 @@
#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
-#ifndef MADV_HWPOISON
-#define MADV_HWPOISON 100
-#endif
-
-#ifndef MADV_SOFT_OFFLINE
-#define MADV_SOFT_OFFLINE 101
-#endif
-
-#ifndef MADV_MERGEABLE
-#define MADV_MERGEABLE 12
-#endif
-
-#ifndef MADV_UNMERGEABLE
-#define MADV_UNMERGEABLE 13
-#endif
-
-#ifndef MADV_HUGEPAGE
-#define MADV_HUGEPAGE 14
-#endif
-
-#ifndef MADV_NOHUGEPAGE
-#define MADV_NOHUGEPAGE 15
-#endif
-
-#ifndef MADV_DONTDUMP
-#define MADV_DONTDUMP 16
-#endif
-
-#ifndef MADV_DODUMP
-#define MADV_DODUMP 17
-#endif
-
-
static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -170,6 +101,7 @@
P_MADV_BHV(SEQUENTIAL);
P_MADV_BHV(WILLNEED);
P_MADV_BHV(DONTNEED);
+ P_MADV_BHV(FREE);
P_MADV_BHV(REMOVE);
P_MADV_BHV(DONTFORK);
P_MADV_BHV(DOFORK);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 2e2d100..4c18271 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -495,7 +495,7 @@
if (!ins__is_call(dl->ins))
return false;
- if (map_groups__find_ams(&target, NULL) ||
+ if (map_groups__find_ams(&target) ||
map__rip_2objdump(target.map, target.map->map_ip(target.map,
target.addr)) !=
dl->ops.target.addr) {
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 13d4143..fb8e42c 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -69,8 +69,11 @@
static void hist_browser__update_rows(struct hist_browser *hb)
{
struct ui_browser *browser = &hb->b;
- u16 header_offset = hb->show_headers ? 1 : 0, index_row;
+ struct hists *hists = hb->hists;
+ struct perf_hpp_list *hpp_list = hists->hpp_list;
+ u16 header_offset, index_row;
+ header_offset = hb->show_headers ? hpp_list->nr_header_lines : 0;
browser->rows = browser->height - header_offset;
/*
* Verify if we were at the last line and that line isn't
@@ -99,8 +102,11 @@
static void hist_browser__gotorc(struct hist_browser *browser, int row, int column)
{
- u16 header_offset = browser->show_headers ? 1 : 0;
+ struct hists *hists = browser->hists;
+ struct perf_hpp_list *hpp_list = hists->hpp_list;
+ u16 header_offset;
+ header_offset = browser->show_headers ? hpp_list->nr_header_lines : 0;
ui_browser__gotorc(&browser->b, row + header_offset, column);
}
@@ -1074,7 +1080,7 @@
bool current_entry;
};
-static int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...)
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...)
{
struct hpp_arg *arg = hpp->ptr;
int ret, len;
@@ -1091,7 +1097,6 @@
ret = scnprintf(hpp->buf, hpp->size, fmt, len, percent);
ui_browser__printf(arg->b, "%s", hpp->buf);
- advance_hpp(hpp, ret);
return ret;
}
@@ -1496,7 +1501,9 @@
return hpp->size <= 0;
}
-static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf, size_t size)
+static int
+hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf,
+ size_t size, int line)
{
struct hists *hists = browser->hists;
struct perf_hpp dummy_hpp = {
@@ -1506,6 +1513,7 @@
struct perf_hpp_fmt *fmt;
size_t ret = 0;
int column = 0;
+ int span = 0;
if (symbol_conf.use_callchain) {
ret = scnprintf(buf, size, " ");
@@ -1517,10 +1525,13 @@
if (perf_hpp__should_skip(fmt, hists) || column++ < browser->b.horiz_scroll)
continue;
- ret = fmt->header(fmt, &dummy_hpp, hists);
+ ret = fmt->header(fmt, &dummy_hpp, hists, line, &span);
if (advance_hpp_check(&dummy_hpp, ret))
break;
+ if (span)
+ continue;
+
ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " ");
if (advance_hpp_check(&dummy_hpp, ret))
break;
@@ -1554,7 +1565,7 @@
if (column++ < browser->b.horiz_scroll)
continue;
- ret = fmt->header(fmt, &dummy_hpp, hists);
+ ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL);
if (advance_hpp_check(&dummy_hpp, ret))
break;
@@ -1591,7 +1602,7 @@
}
first_col = false;
- ret = fmt->header(fmt, &dummy_hpp, hists);
+ ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL);
dummy_hpp.buf[ret] = '\0';
start = trim(dummy_hpp.buf);
@@ -1622,14 +1633,21 @@
static void hists_browser__headers(struct hist_browser *browser)
{
- char headers[1024];
+ struct hists *hists = browser->hists;
+ struct perf_hpp_list *hpp_list = hists->hpp_list;
- hists_browser__scnprintf_headers(browser, headers,
- sizeof(headers));
+ int line;
- ui_browser__gotorc(&browser->b, 0, 0);
- ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
- ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+ for (line = 0; line < hpp_list->nr_header_lines; line++) {
+ char headers[1024];
+
+ hists_browser__scnprintf_headers(browser, headers,
+ sizeof(headers), line);
+
+ ui_browser__gotorc(&browser->b, line, 0);
+ ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
+ ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+ }
}
static void hist_browser__show_headers(struct hist_browser *browser)
@@ -1656,10 +1674,13 @@
u16 header_offset = 0;
struct rb_node *nd;
struct hist_browser *hb = container_of(browser, struct hist_browser, b);
+ struct hists *hists = hb->hists;
if (hb->show_headers) {
+ struct perf_hpp_list *hpp_list = hists->hpp_list;
+
hist_browser__show_headers(hb);
- header_offset = 1;
+ header_offset = hpp_list->nr_header_lines;
}
ui_browser__hists_init_top(browser);
@@ -2054,10 +2075,10 @@
browser->b.use_navkeypressed = true;
browser->show_headers = symbol_conf.show_hist_headers;
- hists__for_each_format(hists, fmt) {
- perf_hpp__reset_width(fmt, hists);
+ hists__for_each_format(hists, fmt)
++browser->b.columns;
- }
+
+ hists__reset_column_width(hists);
}
struct hist_browser *hist_browser__new(struct hists *hists)
@@ -2418,8 +2439,6 @@
browser->hists->dso_filter = NULL;
ui_helpline__pop();
} else {
- if (map == NULL)
- return 0;
ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s DSO\"",
__map__is_kernel(map) ? "the Kernel" : map->dso->short_name);
browser->hists->dso_filter = map->dso;
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index 8091277..98a3466 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -52,9 +52,9 @@
if (target[0] == '0' && tolower(target[1]) == 'x') {
u64 addr = strtoull(target, NULL, 16);
- sym = map__find_symbol(browser->map, addr, NULL);
+ sym = map__find_symbol(browser->map, addr);
} else
- sym = map__find_symbol_by_name(browser->map, target, NULL);
+ sym = map__find_symbol_by_name(browser->map, target);
if (sym != NULL) {
u32 *idx = symbol__browser_index(sym);
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index c5f3677..a4f02de 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -549,7 +549,7 @@
strcat(buf, "+");
first_col = false;
- fmt->header(fmt, &hpp, hists);
+ fmt->header(fmt, &hpp, hists, 0, NULL);
strcat(buf, ltrim(rtrim(hpp.buf)));
}
}
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 4274969..3738839 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -230,13 +230,14 @@
}
static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct hists *hists)
+ struct hists *hists, int line __maybe_unused,
+ int *span __maybe_unused)
{
int len = hpp__width_fn(fmt, hpp, hists);
return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
}
-static int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
{
va_list args;
ssize_t ssize = hpp->size;
@@ -441,6 +442,7 @@
struct perf_hpp_list perf_hpp_list = {
.fields = LIST_HEAD_INIT(perf_hpp_list.fields),
.sorts = LIST_HEAD_INIT(perf_hpp_list.sorts),
+ .nr_header_lines = 1,
};
#undef HPP__COLOR_PRINT_FNS
@@ -697,6 +699,21 @@
}
}
+void hists__reset_column_width(struct hists *hists)
+{
+ struct perf_hpp_fmt *fmt;
+ struct perf_hpp_list_node *node;
+
+ hists__for_each_format(hists, fmt)
+ perf_hpp__reset_width(fmt, hists);
+
+ /* hierarchy entries have their own hpp list */
+ list_for_each_entry(node, &hists->hpp_formats, list) {
+ perf_hpp_list__for_each_format(&node->hpp, fmt)
+ perf_hpp__reset_width(fmt, hists);
+ }
+}
+
void perf_hpp__set_user_width(const char *width_list_str)
{
struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index f04a631..89d8441 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -373,7 +373,8 @@
return 0;
}
-static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list)
{
const char *sep = symbol_conf.field_sep;
struct perf_hpp_fmt *fmt;
@@ -384,7 +385,7 @@
if (symbol_conf.exclude_other && !he->parent)
return 0;
- hists__for_each_format(he->hists, fmt) {
+ perf_hpp_list__for_each_format(hpp_list, fmt) {
if (perf_hpp__should_skip(fmt, he->hists))
continue;
@@ -410,6 +411,11 @@
return hpp->buf - start;
}
+static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+{
+ return __hist_entry__snprintf(he, hpp, he->hists->hpp_list);
+}
+
static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
struct perf_hpp *hpp,
struct hists *hists,
@@ -528,8 +534,8 @@
return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line);
}
-static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
- const char *sep, FILE *fp)
+static int hists__fprintf_hierarchy_headers(struct hists *hists,
+ struct perf_hpp *hpp, FILE *fp)
{
bool first_node, first_col;
int indent;
@@ -538,6 +544,7 @@
unsigned header_width = 0;
struct perf_hpp_fmt *fmt;
struct perf_hpp_list_node *fmt_node;
+ const char *sep = symbol_conf.field_sep;
indent = hists->nr_hpp_node;
@@ -549,7 +556,7 @@
struct perf_hpp_list_node, list);
perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
- fmt->header(fmt, hpp, hists);
+ fmt->header(fmt, hpp, hists, 0, NULL);
fprintf(fp, "%s%s", hpp->buf, sep ?: " ");
}
@@ -569,7 +576,7 @@
header_width += fprintf(fp, "+");
first_col = false;
- fmt->header(fmt, hpp, hists);
+ fmt->header(fmt, hpp, hists, 0, NULL);
header_width += fprintf(fp, "%s", trim(hpp->buf));
}
@@ -623,20 +630,28 @@
return 2;
}
-static int
-hists__fprintf_hierarchy_headers(struct hists *hists,
- struct perf_hpp *hpp,
- FILE *fp)
+static void fprintf_line(struct hists *hists, struct perf_hpp *hpp,
+ int line, FILE *fp)
{
- struct perf_hpp_list_node *fmt_node;
struct perf_hpp_fmt *fmt;
+ const char *sep = symbol_conf.field_sep;
+ bool first = true;
+ int span = 0;
- list_for_each_entry(fmt_node, &hists->hpp_formats, list) {
- perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
- perf_hpp__reset_width(fmt, hists);
+ hists__for_each_format(hists, fmt) {
+ if (perf_hpp__should_skip(fmt, hists))
+ continue;
+
+ if (!first && !span)
+ fprintf(fp, "%s", sep ?: " ");
+ else
+ first = false;
+
+ fmt->header(fmt, hpp, hists, line, &span);
+
+ if (!span)
+ fprintf(fp, "%s", hpp->buf);
}
-
- return print_hierarchy_header(hists, hpp, symbol_conf.field_sep, fp);
}
static int
@@ -644,28 +659,23 @@
struct perf_hpp *hpp,
FILE *fp)
{
+ struct perf_hpp_list *hpp_list = hists->hpp_list;
struct perf_hpp_fmt *fmt;
unsigned int width;
const char *sep = symbol_conf.field_sep;
bool first = true;
+ int line;
- hists__for_each_format(hists, fmt) {
- if (perf_hpp__should_skip(fmt, hists))
- continue;
-
- if (!first)
- fprintf(fp, "%s", sep ?: " ");
- else
- first = false;
-
- fmt->header(fmt, hpp, hists);
- fprintf(fp, "%s", hpp->buf);
+ for (line = 0; line < hpp_list->nr_header_lines; line++) {
+ /* first # is displayed one level up */
+ if (line)
+ fprintf(fp, "# ");
+ fprintf_line(hists, hpp, line, fp);
+ fprintf(fp, "\n");
}
- fprintf(fp, "\n");
-
if (sep)
- return 1;
+ return hpp_list->nr_header_lines;
first = true;
@@ -689,12 +699,12 @@
fprintf(fp, "\n");
fprintf(fp, "#\n");
- return 3;
+ return hpp_list->nr_header_lines + 2;
}
-static int hists__fprintf_headers(struct hists *hists, FILE *fp)
+int hists__fprintf_headers(struct hists *hists, FILE *fp)
{
- char bf[96];
+ char bf[1024];
struct perf_hpp dummy_hpp = {
.buf = bf,
.size = sizeof(bf),
@@ -713,7 +723,6 @@
int max_cols, float min_pcnt, FILE *fp,
bool use_callchain)
{
- struct perf_hpp_fmt *fmt;
struct rb_node *nd;
size_t ret = 0;
const char *sep = symbol_conf.field_sep;
@@ -724,8 +733,7 @@
init_rem_hits();
- hists__for_each_format(hists, fmt)
- perf_hpp__reset_width(fmt, hists);
+ hists__reset_column_width(hists);
if (symbol_conf.col_width_list_str)
perf_hpp__set_user_width(symbol_conf.col_width_list_str);
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 91c5f6e..eb60e61 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,5 +1,6 @@
libperf-y += alias.o
libperf-y += annotate.o
+libperf-y += block-range.o
libperf-y += build-id.o
libperf-y += config.o
libperf-y += ctype.o
@@ -85,6 +86,7 @@
libperf-y += help-unknown-cmd.o
libperf-y += mem-events.o
libperf-y += vsprintf.o
+libperf-y += drv_configs.o
libperf-$(CONFIG_LIBBPF) += bpf-loader.o
libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
@@ -98,6 +100,7 @@
libperf-$(CONFIG_DWARF) += probe-finder.o
libperf-$(CONFIG_DWARF) += dwarf-aux.o
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 4024d30..aeb5a44 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -17,6 +17,7 @@
#include "debug.h"
#include "annotate.h"
#include "evsel.h"
+#include "block-range.h"
#include <regex.h>
#include <pthread.h>
#include <linux/bitops.h>
@@ -53,7 +54,7 @@
return ins__raw_scnprintf(ins, bf, size, ops);
}
-static int call__parse(struct ins_operands *ops)
+static int call__parse(struct ins_operands *ops, struct map *map)
{
char *endptr, *tok, *name;
@@ -81,16 +82,16 @@
return ops->target.name == NULL ? -1 : 0;
indirect_call:
- tok = strchr(endptr, '(');
- if (tok != NULL) {
- ops->target.addr = 0;
+ tok = strchr(endptr, '*');
+ if (tok == NULL) {
+ struct symbol *sym = map__find_symbol(map, map->map_ip(map, ops->target.addr));
+ if (sym != NULL)
+ ops->target.name = strdup(sym->name);
+ else
+ ops->target.addr = 0;
return 0;
}
- tok = strchr(endptr, '*');
- if (tok == NULL)
- return -1;
-
ops->target.addr = strtoull(tok + 1, NULL, 16);
return 0;
}
@@ -117,7 +118,7 @@
return ins->ops == &call_ops;
}
-static int jump__parse(struct ins_operands *ops)
+static int jump__parse(struct ins_operands *ops, struct map *map __maybe_unused)
{
const char *s = strchr(ops->raw, '+');
@@ -172,7 +173,7 @@
return 0;
}
-static int lock__parse(struct ins_operands *ops)
+static int lock__parse(struct ins_operands *ops, struct map *map)
{
char *name;
@@ -193,7 +194,7 @@
return 0;
if (ops->locked.ins->ops->parse &&
- ops->locked.ins->ops->parse(ops->locked.ops) < 0)
+ ops->locked.ins->ops->parse(ops->locked.ops, map) < 0)
goto out_free_ops;
return 0;
@@ -236,7 +237,7 @@
.scnprintf = lock__scnprintf,
};
-static int mov__parse(struct ins_operands *ops)
+static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused)
{
char *s = strchr(ops->raw, ','), *target, *comment, prev;
@@ -303,7 +304,7 @@
.scnprintf = mov__scnprintf,
};
-static int dec__parse(struct ins_operands *ops)
+static int dec__parse(struct ins_operands *ops, struct map *map __maybe_unused)
{
char *target, *comment, *s, prev;
@@ -491,13 +492,6 @@
return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__key_cmp);
}
-int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym)
-{
- struct annotation *notes = symbol__annotation(sym);
- pthread_mutex_init(¬es->lock, NULL);
- return 0;
-}
-
int symbol__alloc_hist(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
@@ -715,7 +709,7 @@
return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip);
}
-static void disasm_line__init_ins(struct disasm_line *dl)
+static void disasm_line__init_ins(struct disasm_line *dl, struct map *map)
{
dl->ins = ins__find(dl->name);
@@ -725,7 +719,7 @@
if (!dl->ins->ops)
return;
- if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops) < 0)
+ if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops, map) < 0)
dl->ins = NULL;
}
@@ -767,7 +761,8 @@
}
static struct disasm_line *disasm_line__new(s64 offset, char *line,
- size_t privsize, int line_nr)
+ size_t privsize, int line_nr,
+ struct map *map)
{
struct disasm_line *dl = zalloc(sizeof(*dl) + privsize);
@@ -782,7 +777,7 @@
if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0)
goto out_free_line;
- disasm_line__init_ins(dl);
+ disasm_line__init_ins(dl, map);
}
}
@@ -866,6 +861,89 @@
return percent;
}
+static const char *annotate__address_color(struct block_range *br)
+{
+ double cov = block_range__coverage(br);
+
+ if (cov >= 0) {
+ /* mark red for >75% coverage */
+ if (cov > 0.75)
+ return PERF_COLOR_RED;
+
+ /* mark dull for <1% coverage */
+ if (cov < 0.01)
+ return PERF_COLOR_NORMAL;
+ }
+
+ return PERF_COLOR_MAGENTA;
+}
+
+static const char *annotate__asm_color(struct block_range *br)
+{
+ double cov = block_range__coverage(br);
+
+ if (cov >= 0) {
+ /* mark dull for <1% coverage */
+ if (cov < 0.01)
+ return PERF_COLOR_NORMAL;
+ }
+
+ return PERF_COLOR_BLUE;
+}
+
+static void annotate__branch_printf(struct block_range *br, u64 addr)
+{
+ bool emit_comment = true;
+
+ if (!br)
+ return;
+
+#if 1
+ if (br->is_target && br->start == addr) {
+ struct block_range *branch = br;
+ double p;
+
+ /*
+ * Find matching branch to our target.
+ */
+ while (!branch->is_branch)
+ branch = block_range__next(branch);
+
+ p = 100 *(double)br->entry / branch->coverage;
+
+ if (p > 0.1) {
+ if (emit_comment) {
+ emit_comment = false;
+ printf("\t#");
+ }
+
+ /*
+ * The percentage of coverage joined at this target in relation
+ * to the next branch.
+ */
+ printf(" +%.2f%%", p);
+ }
+ }
+#endif
+ if (br->is_branch && br->end == addr) {
+ double p = 100*(double)br->taken / br->coverage;
+
+ if (p > 0.1) {
+ if (emit_comment) {
+ emit_comment = false;
+ printf("\t#");
+ }
+
+ /*
+ * The percentage of coverage leaving at this branch, and
+ * its prediction ratio.
+ */
+ printf(" -%.2f%% (p:%.2f%%)", p, 100*(double)br->pred / br->taken);
+ }
+ }
+}
+
+
static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start,
struct perf_evsel *evsel, u64 len, int min_pcnt, int printed,
int max_lines, struct disasm_line *queue)
@@ -885,6 +963,7 @@
s64 offset = dl->offset;
const u64 addr = start + offset;
struct disasm_line *next;
+ struct block_range *br;
next = disasm__get_next_ip_line(¬es->src->source, dl);
@@ -954,8 +1033,12 @@
}
printf(" : ");
- color_fprintf(stdout, PERF_COLOR_MAGENTA, " %" PRIx64 ":", addr);
- color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", dl->line);
+
+ br = block_range__find(addr);
+ color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr);
+ color_fprintf(stdout, annotate__asm_color(br), "%s", dl->line);
+ annotate__branch_printf(br, addr);
+ printf("\n");
if (ppercents != &percent)
free(ppercents);
@@ -1066,7 +1149,7 @@
parsed_line = tmp2 + 1;
}
- dl = disasm_line__new(offset, parsed_line, privsize, *line_nr);
+ dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, map);
free(line);
(*line_nr)++;
@@ -1084,7 +1167,7 @@
.addr = dl->ops.target.addr,
};
- if (!map_groups__find_ams(&target, NULL) &&
+ if (!map_groups__find_ams(&target) &&
target.sym->start == target.al_addr)
dl->ops.target.name = strdup(target.sym->name);
}
@@ -1162,14 +1245,46 @@
return 0;
}
+static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size)
+{
+ char linkname[PATH_MAX];
+ char *build_id_filename;
+
+ if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+ !dso__is_kcore(dso))
+ return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
+
+ build_id_filename = dso__build_id_filename(dso, NULL, 0);
+ if (build_id_filename) {
+ __symbol__join_symfs(filename, filename_size, build_id_filename);
+ free(build_id_filename);
+ } else {
+ if (dso->has_build_id)
+ return ENOMEM;
+ goto fallback;
+ }
+
+ if (dso__is_kcore(dso) ||
+ readlink(filename, linkname, sizeof(linkname)) < 0 ||
+ strstr(linkname, DSO__NAME_KALLSYMS) ||
+ access(filename, R_OK)) {
+fallback:
+ /*
+ * If we don't have build-ids or the build-id file isn't in the
+ * cache, or is just a kallsyms file, well, lets hope that this
+ * DSO is the same as when 'perf record' ran.
+ */
+ __symbol__join_symfs(filename, filename_size, dso->long_name);
+ }
+
+ return 0;
+}
+
int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
{
struct dso *dso = map->dso;
- char *filename = dso__build_id_filename(dso, NULL, 0);
- bool free_filename = true;
char command[PATH_MAX * 2];
FILE *file;
- int err = 0;
char symfs_filename[PATH_MAX];
struct kcore_extract kce;
bool delete_extract = false;
@@ -1177,38 +1292,13 @@
int lineno = 0;
int nline;
pid_t pid;
+ int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
- if (filename)
- symbol__join_symfs(symfs_filename, filename);
-
- if (filename == NULL) {
- if (dso->has_build_id)
- return ENOMEM;
- goto fallback;
- } else if (dso__is_kcore(dso) ||
- readlink(symfs_filename, command, sizeof(command)) < 0 ||
- strstr(command, DSO__NAME_KALLSYMS) ||
- access(symfs_filename, R_OK)) {
- free(filename);
-fallback:
- /*
- * If we don't have build-ids or the build-id file isn't in the
- * cache, or is just a kallsyms file, well, lets hope that this
- * DSO is the same as when 'perf record' ran.
- */
- filename = (char *)dso->long_name;
- symbol__join_symfs(symfs_filename, filename);
- free_filename = false;
- }
-
- if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
- !dso__is_kcore(dso)) {
- err = SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
- goto out_free_filename;
- }
+ if (err)
+ return err;
pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
- filename, sym->name, map->unmap_ip(map, sym->start),
+ symfs_filename, sym->name, map->unmap_ip(map, sym->start),
map->unmap_ip(map, sym->end));
pr_debug("annotating [%p] %30s : [%p] %30s\n",
@@ -1223,11 +1313,6 @@
delete_extract = true;
strlcpy(symfs_filename, kce.extract_filename,
sizeof(symfs_filename));
- if (free_filename) {
- free(filename);
- free_filename = false;
- }
- filename = symfs_filename;
}
} else if (dso__needs_decompress(dso)) {
char tmp[PATH_MAX];
@@ -1236,14 +1321,14 @@
bool ret;
if (kmod_path__parse_ext(&m, symfs_filename))
- goto out_free_filename;
+ goto out;
snprintf(tmp, PATH_MAX, "/tmp/perf-kmod-XXXXXX");
fd = mkstemp(tmp);
if (fd < 0) {
free(m.ext);
- goto out_free_filename;
+ goto out;
}
ret = decompress_to_file(m.ext, symfs_filename, fd);
@@ -1255,7 +1340,7 @@
close(fd);
if (!ret)
- goto out_free_filename;
+ goto out;
strcpy(symfs_filename, tmp);
}
@@ -1271,7 +1356,7 @@
map__rip_2objdump(map, sym->end),
symbol_conf.annotate_asm_raw ? "" : "--no-show-raw",
symbol_conf.annotate_src ? "-S" : "",
- symfs_filename, filename);
+ symfs_filename, symfs_filename);
pr_debug("Executing: %s\n", command);
@@ -1333,11 +1418,10 @@
if (dso__needs_decompress(dso))
unlink(symfs_filename);
-out_free_filename:
+
if (delete_extract)
kcore_extract__delete(&kce);
- if (free_filename)
- free(filename);
+out:
return err;
out_close_stdout:
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index f67ccb0..5bbcec1 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -36,7 +36,7 @@
struct ins_ops {
void (*free)(struct ins_operands *ops);
- int (*parse)(struct ins_operands *ops);
+ int (*parse)(struct ins_operands *ops, struct map *map);
int (*scnprintf)(struct ins *ins, char *bf, size_t size,
struct ins_operands *ops);
};
@@ -130,6 +130,7 @@
struct annotation {
pthread_mutex_t lock;
+ u64 max_coverage;
struct annotated_source *src;
};
@@ -177,7 +178,6 @@
int symbol__strerror_disassemble(struct symbol *sym, struct map *map,
int errnum, char *buf, size_t buflen);
-int symbol__annotate_init(struct map *map, struct symbol *sym);
int symbol__annotate_printf(struct symbol *sym, struct map *map,
struct perf_evsel *evsel, bool full_paths,
int min_pcnt, int max_lines, int context);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index c916901..c5a6e0b1 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -16,6 +16,10 @@
#include <sys/types.h>
#include <sys/mman.h>
#include <stdbool.h>
+#include <ctype.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
#include <linux/kernel.h>
#include <linux/perf_event.h>
@@ -35,9 +39,14 @@
#include "../perf.h"
#include "util.h"
#include "evlist.h"
+#include "dso.h"
+#include "map.h"
+#include "pmu.h"
+#include "evsel.h"
#include "cpumap.h"
#include "thread_map.h"
#include "asm/bug.h"
+#include "symbol/kallsyms.h"
#include "auxtrace.h"
#include <linux/hash.h>
@@ -892,6 +901,7 @@
return intel_pt_process_auxtrace_info(event, session);
case PERF_AUXTRACE_INTEL_BTS:
return intel_bts_process_auxtrace_info(event, session);
+ case PERF_AUXTRACE_CS_ETM:
case PERF_AUXTRACE_UNKNOWN:
default:
return -EINVAL;
@@ -1398,3 +1408,731 @@
return NULL;
}
+
+static void addr_filter__free_str(struct addr_filter *filt)
+{
+ free(filt->str);
+ filt->action = NULL;
+ filt->sym_from = NULL;
+ filt->sym_to = NULL;
+ filt->filename = NULL;
+ filt->str = NULL;
+}
+
+static struct addr_filter *addr_filter__new(void)
+{
+ struct addr_filter *filt = zalloc(sizeof(*filt));
+
+ if (filt)
+ INIT_LIST_HEAD(&filt->list);
+
+ return filt;
+}
+
+static void addr_filter__free(struct addr_filter *filt)
+{
+ if (filt)
+ addr_filter__free_str(filt);
+ free(filt);
+}
+
+static void addr_filters__add(struct addr_filters *filts,
+ struct addr_filter *filt)
+{
+ list_add_tail(&filt->list, &filts->head);
+ filts->cnt += 1;
+}
+
+static void addr_filters__del(struct addr_filters *filts,
+ struct addr_filter *filt)
+{
+ list_del_init(&filt->list);
+ filts->cnt -= 1;
+}
+
+void addr_filters__init(struct addr_filters *filts)
+{
+ INIT_LIST_HEAD(&filts->head);
+ filts->cnt = 0;
+}
+
+void addr_filters__exit(struct addr_filters *filts)
+{
+ struct addr_filter *filt, *n;
+
+ list_for_each_entry_safe(filt, n, &filts->head, list) {
+ addr_filters__del(filts, filt);
+ addr_filter__free(filt);
+ }
+}
+
+static int parse_num_or_str(char **inp, u64 *num, const char **str,
+ const char *str_delim)
+{
+ *inp += strspn(*inp, " ");
+
+ if (isdigit(**inp)) {
+ char *endptr;
+
+ if (!num)
+ return -EINVAL;
+ errno = 0;
+ *num = strtoull(*inp, &endptr, 0);
+ if (errno)
+ return -errno;
+ if (endptr == *inp)
+ return -EINVAL;
+ *inp = endptr;
+ } else {
+ size_t n;
+
+ if (!str)
+ return -EINVAL;
+ *inp += strspn(*inp, " ");
+ *str = *inp;
+ n = strcspn(*inp, str_delim);
+ if (!n)
+ return -EINVAL;
+ *inp += n;
+ if (**inp) {
+ **inp = '\0';
+ *inp += 1;
+ }
+ }
+ return 0;
+}
+
+static int parse_action(struct addr_filter *filt)
+{
+ if (!strcmp(filt->action, "filter")) {
+ filt->start = true;
+ filt->range = true;
+ } else if (!strcmp(filt->action, "start")) {
+ filt->start = true;
+ } else if (!strcmp(filt->action, "stop")) {
+ filt->start = false;
+ } else if (!strcmp(filt->action, "tracestop")) {
+ filt->start = false;
+ filt->range = true;
+ filt->action += 5; /* Change 'tracestop' to 'stop' */
+ } else {
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int parse_sym_idx(char **inp, int *idx)
+{
+ *idx = -1;
+
+ *inp += strspn(*inp, " ");
+
+ if (**inp != '#')
+ return 0;
+
+ *inp += 1;
+
+ if (**inp == 'g' || **inp == 'G') {
+ *inp += 1;
+ *idx = 0;
+ } else {
+ unsigned long num;
+ char *endptr;
+
+ errno = 0;
+ num = strtoul(*inp, &endptr, 0);
+ if (errno)
+ return -errno;
+ if (endptr == *inp || num > INT_MAX)
+ return -EINVAL;
+ *inp = endptr;
+ *idx = num;
+ }
+
+ return 0;
+}
+
+static int parse_addr_size(char **inp, u64 *num, const char **str, int *idx)
+{
+ int err = parse_num_or_str(inp, num, str, " ");
+
+ if (!err && *str)
+ err = parse_sym_idx(inp, idx);
+
+ return err;
+}
+
+static int parse_one_filter(struct addr_filter *filt, const char **filter_inp)
+{
+ char *fstr;
+ int err;
+
+ filt->str = fstr = strdup(*filter_inp);
+ if (!fstr)
+ return -ENOMEM;
+
+ err = parse_num_or_str(&fstr, NULL, &filt->action, " ");
+ if (err)
+ goto out_err;
+
+ err = parse_action(filt);
+ if (err)
+ goto out_err;
+
+ err = parse_addr_size(&fstr, &filt->addr, &filt->sym_from,
+ &filt->sym_from_idx);
+ if (err)
+ goto out_err;
+
+ fstr += strspn(fstr, " ");
+
+ if (*fstr == '/') {
+ fstr += 1;
+ err = parse_addr_size(&fstr, &filt->size, &filt->sym_to,
+ &filt->sym_to_idx);
+ if (err)
+ goto out_err;
+ filt->range = true;
+ }
+
+ fstr += strspn(fstr, " ");
+
+ if (*fstr == '@') {
+ fstr += 1;
+ err = parse_num_or_str(&fstr, NULL, &filt->filename, " ,");
+ if (err)
+ goto out_err;
+ }
+
+ fstr += strspn(fstr, " ,");
+
+ *filter_inp += fstr - filt->str;
+
+ return 0;
+
+out_err:
+ addr_filter__free_str(filt);
+
+ return err;
+}
+
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+ const char *filter)
+{
+ struct addr_filter *filt;
+ const char *fstr = filter;
+ int err;
+
+ while (*fstr) {
+ filt = addr_filter__new();
+ err = parse_one_filter(filt, &fstr);
+ if (err) {
+ addr_filter__free(filt);
+ addr_filters__exit(filts);
+ return err;
+ }
+ addr_filters__add(filts, filt);
+ }
+
+ return 0;
+}
+
+struct sym_args {
+ const char *name;
+ u64 start;
+ u64 size;
+ int idx;
+ int cnt;
+ bool started;
+ bool global;
+ bool selected;
+ bool duplicate;
+ bool near;
+};
+
+static bool kern_sym_match(struct sym_args *args, const char *name, char type)
+{
+ /* A function with the same name, and global or the n'th found or any */
+ return symbol_type__is_a(type, MAP__FUNCTION) &&
+ !strcmp(name, args->name) &&
+ ((args->global && isupper(type)) ||
+ (args->selected && ++(args->cnt) == args->idx) ||
+ (!args->global && !args->selected));
+}
+
+static int find_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+ struct sym_args *args = arg;
+
+ if (args->started) {
+ if (!args->size)
+ args->size = start - args->start;
+ if (args->selected) {
+ if (args->size)
+ return 1;
+ } else if (kern_sym_match(args, name, type)) {
+ args->duplicate = true;
+ return 1;
+ }
+ } else if (kern_sym_match(args, name, type)) {
+ args->started = true;
+ args->start = start;
+ }
+
+ return 0;
+}
+
+static int print_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+ struct sym_args *args = arg;
+
+ if (kern_sym_match(args, name, type)) {
+ pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+ ++args->cnt, start, type, name);
+ args->near = true;
+ } else if (args->near) {
+ args->near = false;
+ pr_err("\t\twhich is near\t\t%s\n", name);
+ }
+
+ return 0;
+}
+
+static int sym_not_found_error(const char *sym_name, int idx)
+{
+ if (idx > 0) {
+ pr_err("N'th occurrence (N=%d) of symbol '%s' not found.\n",
+ idx, sym_name);
+ } else if (!idx) {
+ pr_err("Global symbol '%s' not found.\n", sym_name);
+ } else {
+ pr_err("Symbol '%s' not found.\n", sym_name);
+ }
+ pr_err("Note that symbols must be functions.\n");
+
+ return -EINVAL;
+}
+
+static int find_kern_sym(const char *sym_name, u64 *start, u64 *size, int idx)
+{
+ struct sym_args args = {
+ .name = sym_name,
+ .idx = idx,
+ .global = !idx,
+ .selected = idx > 0,
+ };
+ int err;
+
+ *start = 0;
+ *size = 0;
+
+ err = kallsyms__parse("/proc/kallsyms", &args, find_kern_sym_cb);
+ if (err < 0) {
+ pr_err("Failed to parse /proc/kallsyms\n");
+ return err;
+ }
+
+ if (args.duplicate) {
+ pr_err("Multiple kernel symbols with name '%s'\n", sym_name);
+ args.cnt = 0;
+ kallsyms__parse("/proc/kallsyms", &args, print_kern_sym_cb);
+ pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+ sym_name);
+ pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+ return -EINVAL;
+ }
+
+ if (!args.started) {
+ pr_err("Kernel symbol lookup: ");
+ return sym_not_found_error(sym_name, idx);
+ }
+
+ *start = args.start;
+ *size = args.size;
+
+ return 0;
+}
+
+static int find_entire_kern_cb(void *arg, const char *name __maybe_unused,
+ char type, u64 start)
+{
+ struct sym_args *args = arg;
+
+ if (!symbol_type__is_a(type, MAP__FUNCTION))
+ return 0;
+
+ if (!args->started) {
+ args->started = true;
+ args->start = start;
+ }
+ /* Don't know exactly where the kernel ends, so we add a page */
+ args->size = round_up(start, page_size) + page_size - args->start;
+
+ return 0;
+}
+
+static int addr_filter__entire_kernel(struct addr_filter *filt)
+{
+ struct sym_args args = { .started = false };
+ int err;
+
+ err = kallsyms__parse("/proc/kallsyms", &args, find_entire_kern_cb);
+ if (err < 0 || !args.started) {
+ pr_err("Failed to parse /proc/kallsyms\n");
+ return err;
+ }
+
+ filt->addr = args.start;
+ filt->size = args.size;
+
+ return 0;
+}
+
+static int check_end_after_start(struct addr_filter *filt, u64 start, u64 size)
+{
+ if (start + size >= filt->addr)
+ return 0;
+
+ if (filt->sym_from) {
+ pr_err("Symbol '%s' (0x%"PRIx64") comes before '%s' (0x%"PRIx64")\n",
+ filt->sym_to, start, filt->sym_from, filt->addr);
+ } else {
+ pr_err("Symbol '%s' (0x%"PRIx64") comes before address 0x%"PRIx64")\n",
+ filt->sym_to, start, filt->addr);
+ }
+
+ return -EINVAL;
+}
+
+static int addr_filter__resolve_kernel_syms(struct addr_filter *filt)
+{
+ bool no_size = false;
+ u64 start, size;
+ int err;
+
+ if (symbol_conf.kptr_restrict) {
+ pr_err("Kernel addresses are restricted. Unable to resolve kernel symbols.\n");
+ return -EINVAL;
+ }
+
+ if (filt->sym_from && !strcmp(filt->sym_from, "*"))
+ return addr_filter__entire_kernel(filt);
+
+ if (filt->sym_from) {
+ err = find_kern_sym(filt->sym_from, &start, &size,
+ filt->sym_from_idx);
+ if (err)
+ return err;
+ filt->addr = start;
+ if (filt->range && !filt->size && !filt->sym_to) {
+ filt->size = size;
+ no_size = !!size;
+ }
+ }
+
+ if (filt->sym_to) {
+ err = find_kern_sym(filt->sym_to, &start, &size,
+ filt->sym_to_idx);
+ if (err)
+ return err;
+
+ err = check_end_after_start(filt, start, size);
+ if (err)
+ return err;
+ filt->size = start + size - filt->addr;
+ no_size = !!size;
+ }
+
+ /* The very last symbol in kallsyms does not imply a particular size */
+ if (no_size) {
+ pr_err("Cannot determine size of symbol '%s'\n",
+ filt->sym_to ? filt->sym_to : filt->sym_from);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct dso *load_dso(const char *name)
+{
+ struct map *map;
+ struct dso *dso;
+
+ map = dso__new_map(name);
+ if (!map)
+ return NULL;
+
+ map__load(map);
+
+ dso = dso__get(map->dso);
+
+ map__put(map);
+
+ return dso;
+}
+
+static bool dso_sym_match(struct symbol *sym, const char *name, int *cnt,
+ int idx)
+{
+ /* Same name, and global or the n'th found or any */
+ return !arch__compare_symbol_names(name, sym->name) &&
+ ((!idx && sym->binding == STB_GLOBAL) ||
+ (idx > 0 && ++*cnt == idx) ||
+ idx < 0);
+}
+
+static void print_duplicate_syms(struct dso *dso, const char *sym_name)
+{
+ struct symbol *sym;
+ bool near = false;
+ int cnt = 0;
+
+ pr_err("Multiple symbols with name '%s'\n", sym_name);
+
+ sym = dso__first_symbol(dso, MAP__FUNCTION);
+ while (sym) {
+ if (dso_sym_match(sym, sym_name, &cnt, -1)) {
+ pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+ ++cnt, sym->start,
+ sym->binding == STB_GLOBAL ? 'g' :
+ sym->binding == STB_LOCAL ? 'l' : 'w',
+ sym->name);
+ near = true;
+ } else if (near) {
+ near = false;
+ pr_err("\t\twhich is near\t\t%s\n", sym->name);
+ }
+ sym = dso__next_symbol(sym);
+ }
+
+ pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+ sym_name);
+ pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+}
+
+static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
+ u64 *size, int idx)
+{
+ struct symbol *sym;
+ int cnt = 0;
+
+ *start = 0;
+ *size = 0;
+
+ sym = dso__first_symbol(dso, MAP__FUNCTION);
+ while (sym) {
+ if (*start) {
+ if (!*size)
+ *size = sym->start - *start;
+ if (idx > 0) {
+ if (*size)
+ return 1;
+ } else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+ print_duplicate_syms(dso, sym_name);
+ return -EINVAL;
+ }
+ } else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+ *start = sym->start;
+ *size = sym->end - sym->start;
+ }
+ sym = dso__next_symbol(sym);
+ }
+
+ if (!*start)
+ return sym_not_found_error(sym_name, idx);
+
+ return 0;
+}
+
+static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso)
+{
+ struct symbol *first_sym = dso__first_symbol(dso, MAP__FUNCTION);
+ struct symbol *last_sym = dso__last_symbol(dso, MAP__FUNCTION);
+
+ if (!first_sym || !last_sym) {
+ pr_err("Failed to determine filter for %s\nNo symbols found.\n",
+ filt->filename);
+ return -EINVAL;
+ }
+
+ filt->addr = first_sym->start;
+ filt->size = last_sym->end - first_sym->start;
+
+ return 0;
+}
+
+static int addr_filter__resolve_syms(struct addr_filter *filt)
+{
+ u64 start, size;
+ struct dso *dso;
+ int err = 0;
+
+ if (!filt->sym_from && !filt->sym_to)
+ return 0;
+
+ if (!filt->filename)
+ return addr_filter__resolve_kernel_syms(filt);
+
+ dso = load_dso(filt->filename);
+ if (!dso) {
+ pr_err("Failed to load symbols from: %s\n", filt->filename);
+ return -EINVAL;
+ }
+
+ if (filt->sym_from && !strcmp(filt->sym_from, "*")) {
+ err = addr_filter__entire_dso(filt, dso);
+ goto put_dso;
+ }
+
+ if (filt->sym_from) {
+ err = find_dso_sym(dso, filt->sym_from, &start, &size,
+ filt->sym_from_idx);
+ if (err)
+ goto put_dso;
+ filt->addr = start;
+ if (filt->range && !filt->size && !filt->sym_to)
+ filt->size = size;
+ }
+
+ if (filt->sym_to) {
+ err = find_dso_sym(dso, filt->sym_to, &start, &size,
+ filt->sym_to_idx);
+ if (err)
+ goto put_dso;
+
+ err = check_end_after_start(filt, start, size);
+ if (err)
+ return err;
+
+ filt->size = start + size - filt->addr;
+ }
+
+put_dso:
+ dso__put(dso);
+
+ return err;
+}
+
+static char *addr_filter__to_str(struct addr_filter *filt)
+{
+ char filename_buf[PATH_MAX];
+ const char *at = "";
+ const char *fn = "";
+ char *filter;
+ int err;
+
+ if (filt->filename) {
+ at = "@";
+ fn = realpath(filt->filename, filename_buf);
+ if (!fn)
+ return NULL;
+ }
+
+ if (filt->range) {
+ err = asprintf(&filter, "%s 0x%"PRIx64"/0x%"PRIx64"%s%s",
+ filt->action, filt->addr, filt->size, at, fn);
+ } else {
+ err = asprintf(&filter, "%s 0x%"PRIx64"%s%s",
+ filt->action, filt->addr, at, fn);
+ }
+
+ return err < 0 ? NULL : filter;
+}
+
+static int parse_addr_filter(struct perf_evsel *evsel, const char *filter,
+ int max_nr)
+{
+ struct addr_filters filts;
+ struct addr_filter *filt;
+ int err;
+
+ addr_filters__init(&filts);
+
+ err = addr_filters__parse_bare_filter(&filts, filter);
+ if (err)
+ goto out_exit;
+
+ if (filts.cnt > max_nr) {
+ pr_err("Error: number of address filters (%d) exceeds maximum (%d)\n",
+ filts.cnt, max_nr);
+ err = -EINVAL;
+ goto out_exit;
+ }
+
+ list_for_each_entry(filt, &filts.head, list) {
+ char *new_filter;
+
+ err = addr_filter__resolve_syms(filt);
+ if (err)
+ goto out_exit;
+
+ new_filter = addr_filter__to_str(filt);
+ if (!new_filter) {
+ err = -ENOMEM;
+ goto out_exit;
+ }
+
+ if (perf_evsel__append_addr_filter(evsel, new_filter)) {
+ err = -ENOMEM;
+ goto out_exit;
+ }
+ }
+
+out_exit:
+ addr_filters__exit(&filts);
+
+ if (err) {
+ pr_err("Failed to parse address filter: '%s'\n", filter);
+ pr_err("Filter format is: filter|start|stop|tracestop <start symbol or address> [/ <end symbol or size>] [@<file name>]\n");
+ pr_err("Where multiple filters are separated by space or comma.\n");
+ }
+
+ return err;
+}
+
+static struct perf_pmu *perf_evsel__find_pmu(struct perf_evsel *evsel)
+{
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ if (pmu->type == evsel->attr.type)
+ break;
+ }
+
+ return pmu;
+}
+
+static int perf_evsel__nr_addr_filter(struct perf_evsel *evsel)
+{
+ struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
+ int nr_addr_filters = 0;
+
+ if (!pmu)
+ return 0;
+
+ perf_pmu__scan_file(pmu, "nr_addr_filters", "%d", &nr_addr_filters);
+
+ return nr_addr_filters;
+}
+
+int auxtrace_parse_filters(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ char *filter;
+ int err, max_nr;
+
+ evlist__for_each_entry(evlist, evsel) {
+ filter = evsel->filter;
+ max_nr = perf_evsel__nr_addr_filter(evsel);
+ if (!filter || !max_nr)
+ continue;
+ evsel->filter = NULL;
+ err = parse_addr_filter(evsel, filter, max_nr);
+ free(filter);
+ if (err)
+ return err;
+ pr_debug("Address filter: %s\n", evsel->filter);
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index ac5f0d7..26fb1ee 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -41,6 +41,7 @@
PERF_AUXTRACE_UNKNOWN,
PERF_AUXTRACE_INTEL_PT,
PERF_AUXTRACE_INTEL_BTS,
+ PERF_AUXTRACE_CS_ETM,
};
enum itrace_period_type {
@@ -317,6 +318,48 @@
unsigned int alignment;
};
+/**
+ * struct addr_filter - address filter.
+ * @list: list node
+ * @range: true if it is a range filter
+ * @start: true if action is 'filter' or 'start'
+ * @action: 'filter', 'start' or 'stop' ('tracestop' is accepted but converted
+ * to 'stop')
+ * @sym_from: symbol name for the filter address
+ * @sym_to: symbol name that determines the filter size
+ * @sym_from_idx: selects n'th from symbols with the same name (0 means global
+ * and less than 0 means symbol must be unique)
+ * @sym_to_idx: same as @sym_from_idx but for @sym_to
+ * @addr: filter address
+ * @size: filter region size (for range filters)
+ * @filename: DSO file name or NULL for the kernel
+ * @str: allocated string that contains the other string members
+ */
+struct addr_filter {
+ struct list_head list;
+ bool range;
+ bool start;
+ const char *action;
+ const char *sym_from;
+ const char *sym_to;
+ int sym_from_idx;
+ int sym_to_idx;
+ u64 addr;
+ u64 size;
+ const char *filename;
+ char *str;
+};
+
+/**
+ * struct addr_filters - list of address filters.
+ * @head: list of address filters
+ * @cnt: number of address filters
+ */
+struct addr_filters {
+ struct list_head head;
+ int cnt;
+};
+
#ifdef HAVE_AUXTRACE_SUPPORT
/*
@@ -481,6 +524,12 @@
union perf_event *event);
void events_stats__auxtrace_error_warn(const struct events_stats *stats);
+void addr_filters__init(struct addr_filters *filts);
+void addr_filters__exit(struct addr_filters *filts);
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+ const char *filter);
+int auxtrace_parse_filters(struct perf_evlist *evlist);
+
static inline int auxtrace__process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
@@ -639,6 +688,12 @@
{
}
+static inline
+int auxtrace_parse_filters(struct perf_evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
struct auxtrace_mmap_params *mp,
void *userpg, int fd);
diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c
new file mode 100644
index 0000000..7b3e1d7
--- /dev/null
+++ b/tools/perf/util/block-range.c
@@ -0,0 +1,328 @@
+#include "block-range.h"
+#include "annotate.h"
+
+struct {
+ struct rb_root root;
+ u64 blocks;
+} block_ranges;
+
+static void block_range__debug(void)
+{
+ /*
+ * XXX still paranoid for now; see if we can make this depend on
+ * DEBUG=1 builds.
+ */
+#if 1
+ struct rb_node *rb;
+ u64 old = 0; /* NULL isn't executable */
+
+ for (rb = rb_first(&block_ranges.root); rb; rb = rb_next(rb)) {
+ struct block_range *entry = rb_entry(rb, struct block_range, node);
+
+ assert(old < entry->start);
+ assert(entry->start <= entry->end); /* single instruction block; jump to a jump */
+
+ old = entry->end;
+ }
+#endif
+}
+
+struct block_range *block_range__find(u64 addr)
+{
+ struct rb_node **p = &block_ranges.root.rb_node;
+ struct rb_node *parent = NULL;
+ struct block_range *entry;
+
+ while (*p != NULL) {
+ parent = *p;
+ entry = rb_entry(parent, struct block_range, node);
+
+ if (addr < entry->start)
+ p = &parent->rb_left;
+ else if (addr > entry->end)
+ p = &parent->rb_right;
+ else
+ return entry;
+ }
+
+ return NULL;
+}
+
+static inline void rb_link_left_of_node(struct rb_node *left, struct rb_node *node)
+{
+ struct rb_node **p = &node->rb_left;
+ while (*p) {
+ node = *p;
+ p = &node->rb_right;
+ }
+ rb_link_node(left, node, p);
+}
+
+static inline void rb_link_right_of_node(struct rb_node *right, struct rb_node *node)
+{
+ struct rb_node **p = &node->rb_right;
+ while (*p) {
+ node = *p;
+ p = &node->rb_left;
+ }
+ rb_link_node(right, node, p);
+}
+
+/**
+ * block_range__create
+ * @start: branch target starting this basic block
+ * @end: branch ending this basic block
+ *
+ * Create all the required block ranges to precisely span the given range.
+ */
+struct block_range_iter block_range__create(u64 start, u64 end)
+{
+ struct rb_node **p = &block_ranges.root.rb_node;
+ struct rb_node *n, *parent = NULL;
+ struct block_range *next, *entry = NULL;
+ struct block_range_iter iter = { NULL, NULL };
+
+ while (*p != NULL) {
+ parent = *p;
+ entry = rb_entry(parent, struct block_range, node);
+
+ if (start < entry->start)
+ p = &parent->rb_left;
+ else if (start > entry->end)
+ p = &parent->rb_right;
+ else
+ break;
+ }
+
+ /*
+ * Didn't find anything.. there's a hole at @start, however @end might
+ * be inside/behind the next range.
+ */
+ if (!*p) {
+ if (!entry) /* tree empty */
+ goto do_whole;
+
+ /*
+ * If the last node is before, advance one to find the next.
+ */
+ n = parent;
+ if (entry->end < start) {
+ n = rb_next(n);
+ if (!n)
+ goto do_whole;
+ }
+ next = rb_entry(n, struct block_range, node);
+
+ if (next->start <= end) { /* add head: [start...][n->start...] */
+ struct block_range *head = malloc(sizeof(struct block_range));
+ if (!head)
+ return iter;
+
+ *head = (struct block_range){
+ .start = start,
+ .end = next->start - 1,
+ .is_target = 1,
+ .is_branch = 0,
+ };
+
+ rb_link_left_of_node(&head->node, &next->node);
+ rb_insert_color(&head->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.start = head;
+ goto do_tail;
+ }
+
+do_whole:
+ /*
+ * The whole [start..end] range is non-overlapping.
+ */
+ entry = malloc(sizeof(struct block_range));
+ if (!entry)
+ return iter;
+
+ *entry = (struct block_range){
+ .start = start,
+ .end = end,
+ .is_target = 1,
+ .is_branch = 1,
+ };
+
+ rb_link_node(&entry->node, parent, p);
+ rb_insert_color(&entry->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.start = entry;
+ iter.end = entry;
+ goto done;
+ }
+
+ /*
+ * We found a range that overlapped with ours, split if needed.
+ */
+ if (entry->start < start) { /* split: [e->start...][start...] */
+ struct block_range *head = malloc(sizeof(struct block_range));
+ if (!head)
+ return iter;
+
+ *head = (struct block_range){
+ .start = entry->start,
+ .end = start - 1,
+ .is_target = entry->is_target,
+ .is_branch = 0,
+
+ .coverage = entry->coverage,
+ .entry = entry->entry,
+ };
+
+ entry->start = start;
+ entry->is_target = 1;
+ entry->entry = 0;
+
+ rb_link_left_of_node(&head->node, &entry->node);
+ rb_insert_color(&head->node, &block_ranges.root);
+ block_range__debug();
+
+ } else if (entry->start == start)
+ entry->is_target = 1;
+
+ iter.start = entry;
+
+do_tail:
+ /*
+ * At this point we've got: @iter.start = [@start...] but @end can still be
+ * inside or beyond it.
+ */
+ entry = iter.start;
+ for (;;) {
+ /*
+ * If @end is inside @entry, split.
+ */
+ if (end < entry->end) { /* split: [...end][...e->end] */
+ struct block_range *tail = malloc(sizeof(struct block_range));
+ if (!tail)
+ return iter;
+
+ *tail = (struct block_range){
+ .start = end + 1,
+ .end = entry->end,
+ .is_target = 0,
+ .is_branch = entry->is_branch,
+
+ .coverage = entry->coverage,
+ .taken = entry->taken,
+ .pred = entry->pred,
+ };
+
+ entry->end = end;
+ entry->is_branch = 1;
+ entry->taken = 0;
+ entry->pred = 0;
+
+ rb_link_right_of_node(&tail->node, &entry->node);
+ rb_insert_color(&tail->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.end = entry;
+ goto done;
+ }
+
+ /*
+ * If @end matches @entry, done
+ */
+ if (end == entry->end) {
+ entry->is_branch = 1;
+ iter.end = entry;
+ goto done;
+ }
+
+ next = block_range__next(entry);
+ if (!next)
+ goto add_tail;
+
+ /*
+ * If @end is in beyond @entry but not inside @next, add tail.
+ */
+ if (end < next->start) { /* add tail: [...e->end][...end] */
+ struct block_range *tail;
+add_tail:
+ tail = malloc(sizeof(struct block_range));
+ if (!tail)
+ return iter;
+
+ *tail = (struct block_range){
+ .start = entry->end + 1,
+ .end = end,
+ .is_target = 0,
+ .is_branch = 1,
+ };
+
+ rb_link_right_of_node(&tail->node, &entry->node);
+ rb_insert_color(&tail->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.end = tail;
+ goto done;
+ }
+
+ /*
+ * If there is a hole between @entry and @next, fill it.
+ */
+ if (entry->end + 1 != next->start) {
+ struct block_range *hole = malloc(sizeof(struct block_range));
+ if (!hole)
+ return iter;
+
+ *hole = (struct block_range){
+ .start = entry->end + 1,
+ .end = next->start - 1,
+ .is_target = 0,
+ .is_branch = 0,
+ };
+
+ rb_link_left_of_node(&hole->node, &next->node);
+ rb_insert_color(&hole->node, &block_ranges.root);
+ block_range__debug();
+ }
+
+ entry = next;
+ }
+
+done:
+ assert(iter.start->start == start && iter.start->is_target);
+ assert(iter.end->end == end && iter.end->is_branch);
+
+ block_ranges.blocks++;
+
+ return iter;
+}
+
+
+/*
+ * Compute coverage as:
+ *
+ * br->coverage / br->sym->max_coverage
+ *
+ * This ensures each symbol has a 100% spot, to reflect that each symbol has a
+ * most covered section.
+ *
+ * Returns [0-1] for coverage and -1 if we had no data what so ever or the
+ * symbol does not exist.
+ */
+double block_range__coverage(struct block_range *br)
+{
+ struct symbol *sym;
+
+ if (!br) {
+ if (block_ranges.blocks)
+ return 0;
+
+ return -1;
+ }
+
+ sym = br->sym;
+ if (!sym)
+ return -1;
+
+ return (double)br->coverage / symbol__annotation(sym)->max_coverage;
+}
diff --git a/tools/perf/util/block-range.h b/tools/perf/util/block-range.h
new file mode 100644
index 0000000..a8c8413
--- /dev/null
+++ b/tools/perf/util/block-range.h
@@ -0,0 +1,71 @@
+#ifndef __PERF_BLOCK_RANGE_H
+#define __PERF_BLOCK_RANGE_H
+
+#include "symbol.h"
+
+/*
+ * struct block_range - non-overlapping parts of basic blocks
+ * @node: treenode
+ * @start: inclusive start of range
+ * @end: inclusive end of range
+ * @is_target: @start is a jump target
+ * @is_branch: @end is a branch instruction
+ * @coverage: number of blocks that cover this range
+ * @taken: number of times the branch is taken (requires @is_branch)
+ * @pred: number of times the taken branch was predicted
+ */
+struct block_range {
+ struct rb_node node;
+
+ struct symbol *sym;
+
+ u64 start;
+ u64 end;
+
+ int is_target, is_branch;
+
+ u64 coverage;
+ u64 entry;
+ u64 taken;
+ u64 pred;
+};
+
+static inline struct block_range *block_range__next(struct block_range *br)
+{
+ struct rb_node *n = rb_next(&br->node);
+ if (!n)
+ return NULL;
+ return rb_entry(n, struct block_range, node);
+}
+
+struct block_range_iter {
+ struct block_range *start;
+ struct block_range *end;
+};
+
+static inline struct block_range *block_range_iter(struct block_range_iter *iter)
+{
+ return iter->start;
+}
+
+static inline bool block_range_iter__next(struct block_range_iter *iter)
+{
+ if (iter->start == iter->end)
+ return false;
+
+ iter->start = block_range__next(iter->start);
+ return true;
+}
+
+static inline bool block_range_iter__valid(struct block_range_iter *iter)
+{
+ if (!iter->start || !iter->end)
+ return false;
+ return true;
+}
+
+extern struct block_range *block_range__find(u64 addr);
+extern struct block_range_iter block_range__create(u64 start, u64 end);
+extern double block_range__coverage(struct block_range *br);
+
+#endif /* __PERF_BLOCK_RANGE_H */
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 1f12e4e..2b2c9b8 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -531,7 +531,7 @@
ptevs = malloc(array_sz);
if (!ptevs) {
- pr_debug("No ehough memory: alloc ptevs failed\n");
+ pr_debug("No enough memory: alloc ptevs failed\n");
return -ENOMEM;
}
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 5651f3c..e528c40 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -620,7 +620,7 @@
ret = probe_cache__scan_sdt(cache, realname);
if (ret >= 0) {
- pr_debug("Found %d SDTs in %s\n", ret, realname);
+ pr_debug4("Found %d SDTs in %s\n", ret, realname);
if (probe_cache__commit(cache) < 0)
ret = -1;
}
@@ -691,7 +691,7 @@
/* Update SDT cache : error is just warned */
if (build_id_cache__add_sdt_cache(sbuild_id, realname) < 0)
- pr_debug("Failed to update/scan SDT cache for %s\n", realname);
+ pr_debug4("Failed to update/scan SDT cache for %s\n", realname);
out_free:
if (!is_kallsyms)
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
new file mode 100644
index 0000000..3cc6bc3
--- /dev/null
+++ b/tools/perf/util/cs-etm.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef INCLUDE__UTIL_PERF_CS_ETM_H__
+#define INCLUDE__UTIL_PERF_CS_ETM_H__
+
+/* Versionning header in case things need tro change in the future. That way
+ * decoding of old snapshot is still possible.
+ */
+enum {
+ /* Starting with 0x0 */
+ CS_HEADER_VERSION_0,
+ /* PMU->type (32 bit), total # of CPUs (32 bit) */
+ CS_PMU_TYPE_CPUS,
+ CS_ETM_SNAPSHOT,
+ CS_HEADER_VERSION_0_MAX,
+};
+
+/* Beginning of header common to both ETMv3 and V4 */
+enum {
+ CS_ETM_MAGIC,
+ CS_ETM_CPU,
+};
+
+/* ETMv3/PTM metadata */
+enum {
+ /* Dynamic, configurable parameters */
+ CS_ETM_ETMCR = CS_ETM_CPU + 1,
+ CS_ETM_ETMTRACEIDR,
+ /* RO, taken from sysFS */
+ CS_ETM_ETMCCER,
+ CS_ETM_ETMIDR,
+ CS_ETM_PRIV_MAX,
+};
+
+/* ETMv4 metadata */
+enum {
+ /* Dynamic, configurable parameters */
+ CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1,
+ CS_ETMV4_TRCTRACEIDR,
+ /* RO, taken from sysFS */
+ CS_ETMV4_TRCIDR0,
+ CS_ETMV4_TRCIDR1,
+ CS_ETMV4_TRCIDR2,
+ CS_ETMV4_TRCIDR8,
+ CS_ETMV4_TRCAUTHSTATUS,
+ CS_ETMV4_PRIV_MAX,
+};
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+
+#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
+
+static const u64 __perf_cs_etmv3_magic = 0x3030303030303030ULL;
+static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL;
+#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))
+#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
+
+#endif
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 4f979bb..7123f4d 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -437,7 +437,7 @@
int ret;
if (nr_elements * sizeof(u32) != raw_size)
- pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n",
+ pr_warning("Incorrect raw_size (%u) in bpf output event, skip %zu bytes\n",
raw_size, nr_elements * sizeof(u32) - raw_size);
len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 8c4212a..c1838b6 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -6,6 +6,7 @@
#include <stdarg.h>
#include <stdio.h>
#include <api/debug.h>
+#include <linux/time64.h>
#include "cache.h"
#include "color.h"
@@ -14,9 +15,6 @@
#include "util.h"
#include "target.h"
-#define NSECS_PER_SEC 1000000000ULL
-#define NSECS_PER_USEC 1000ULL
-
int verbose;
bool dump_trace = false, quiet = false;
int debug_ordered_events;
@@ -54,9 +52,9 @@
int ret = 0;
u64 secs, usecs, nsecs = t;
- secs = nsecs / NSECS_PER_SEC;
- nsecs -= secs * NSECS_PER_SEC;
- usecs = nsecs / NSECS_PER_USEC;
+ secs = nsecs / NSEC_PER_SEC;
+ nsecs -= secs * NSEC_PER_SEC;
+ usecs = nsecs / NSEC_PER_USEC;
ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ",
secs, usecs);
diff --git a/tools/perf/util/drv_configs.c b/tools/perf/util/drv_configs.c
new file mode 100644
index 0000000..1647f28
--- /dev/null
+++ b/tools/perf/util/drv_configs.c
@@ -0,0 +1,77 @@
+/*
+ * drv_configs.h: Interface to apply PMU specific configuration
+ * Copyright (c) 2016-2018, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include "drv_configs.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "pmu.h"
+
+static int
+perf_evsel__apply_drv_configs(struct perf_evsel *evsel,
+ struct perf_evsel_config_term **err_term)
+{
+ bool found = false;
+ int err = 0;
+ struct perf_evsel_config_term *term;
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmu__scan(pmu)) != NULL)
+ if (pmu->type == evsel->attr.type) {
+ found = true;
+ break;
+ }
+
+ list_for_each_entry(term, &evsel->config_terms, list) {
+ if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
+ continue;
+
+ /*
+ * We have a configuration term, report an error if we
+ * can't find the PMU or if the PMU driver doesn't support
+ * cmd line driver configuration.
+ */
+ if (!found || !pmu->set_drv_config) {
+ err = -EINVAL;
+ *err_term = term;
+ break;
+ }
+
+ err = pmu->set_drv_config(term);
+ if (err) {
+ *err_term = term;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
+ struct perf_evsel **err_evsel,
+ struct perf_evsel_config_term **err_term)
+{
+ struct perf_evsel *evsel;
+ int err = 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ err = perf_evsel__apply_drv_configs(evsel, err_term);
+ if (err) {
+ *err_evsel = evsel;
+ break;
+ }
+ }
+
+ return err;
+}
diff --git a/tools/perf/util/drv_configs.h b/tools/perf/util/drv_configs.h
new file mode 100644
index 0000000..32bc9ba
--- /dev/null
+++ b/tools/perf/util/drv_configs.h
@@ -0,0 +1,26 @@
+/*
+ * drv_configs.h: Interface to apply PMU specific configuration
+ * Copyright (c) 2016-2018, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_DRV_CONFIGS_H
+#define __PERF_DRV_CONFIGS_H
+
+#include "drv_configs.h"
+#include "evlist.h"
+#include "evsel.h"
+
+int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
+ struct perf_evsel **err_evsel,
+ struct perf_evsel_config_term **term);
+#endif
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 774f6ec..d2c6cdd 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -363,6 +363,9 @@
return -EINVAL;
}
+ if (!is_regular_file(name))
+ return -EINVAL;
+
fd = do_open(name);
free(name);
return fd;
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index a347b19..41e068e 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -130,6 +130,22 @@
}
/**
+ * die_get_linkage_name - Get the linkage name of the object
+ * @dw_die: A DIE of the object
+ *
+ * Get the linkage name attiribute of given @dw_die.
+ * For C++ binary, the linkage name will be the mangled symbol.
+ */
+const char *die_get_linkage_name(Dwarf_Die *dw_die)
+{
+ Dwarf_Attribute attr;
+
+ if (dwarf_attr_integrate(dw_die, DW_AT_linkage_name, &attr) == NULL)
+ return NULL;
+ return dwarf_formstring(&attr);
+}
+
+/**
* die_compare_name - Compare diename and tname
* @dw_die: a DIE
* @tname: a string of target name
@@ -145,18 +161,26 @@
}
/**
- * die_match_name - Match diename and glob
+ * die_match_name - Match diename/linkage name and glob
* @dw_die: a DIE
* @glob: a string of target glob pattern
*
* Glob matching the name of @dw_die and @glob. Return false if matching fail.
+ * This also match linkage name.
*/
bool die_match_name(Dwarf_Die *dw_die, const char *glob)
{
const char *name;
name = dwarf_diename(dw_die);
- return name ? strglobmatch(name, glob) : false;
+ if (name && strglobmatch(name, glob))
+ return true;
+ /* fall back to check linkage name */
+ name = die_get_linkage_name(dw_die);
+ if (name && strglobmatch(name, glob))
+ return true;
+
+ return false;
}
/**
@@ -1085,3 +1109,182 @@
return -ENOTSUP;
}
#endif
+
+/*
+ * die_has_loclist - Check if DW_AT_location of @vr_die is a location list
+ * @vr_die: a variable DIE
+ */
+static bool die_has_loclist(Dwarf_Die *vr_die)
+{
+ Dwarf_Attribute loc;
+ int tag = dwarf_tag(vr_die);
+
+ if (tag != DW_TAG_formal_parameter &&
+ tag != DW_TAG_variable)
+ return false;
+
+ return (dwarf_attr_integrate(vr_die, DW_AT_location, &loc) &&
+ dwarf_whatform(&loc) == DW_FORM_sec_offset);
+}
+
+/*
+ * die_is_optimized_target - Check if target program is compiled with
+ * optimization
+ * @cu_die: a CU DIE
+ *
+ * For any object in given CU whose DW_AT_location is a location list,
+ * target program is compiled with optimization. This is applicable to
+ * clang as well.
+ */
+bool die_is_optimized_target(Dwarf_Die *cu_die)
+{
+ Dwarf_Die tmp_die;
+
+ if (die_has_loclist(cu_die))
+ return true;
+
+ if (!dwarf_child(cu_die, &tmp_die) &&
+ die_is_optimized_target(&tmp_die))
+ return true;
+
+ if (!dwarf_siblingof(cu_die, &tmp_die) &&
+ die_is_optimized_target(&tmp_die))
+ return true;
+
+ return false;
+}
+
+/*
+ * die_search_idx - Search index of given line address
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @addr: address we are looking for
+ * @idx: index to be set by this function (return value)
+ *
+ * Search for @addr by looping over every lines of CU. If address
+ * matches, set index of that line in @idx. Note that single source
+ * line can have multiple line records. i.e. single source line can
+ * have multiple index.
+ */
+static bool die_search_idx(Dwarf_Lines *lines, unsigned long nr_lines,
+ Dwarf_Addr addr, unsigned long *idx)
+{
+ unsigned long i;
+ Dwarf_Addr tmp;
+
+ for (i = 0; i < nr_lines; i++) {
+ if (dwarf_lineaddr(dwarf_onesrcline(lines, i), &tmp))
+ return false;
+
+ if (tmp == addr) {
+ *idx = i;
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
+ * die_get_postprologue_addr - Search next address after function prologue
+ * @entrypc_idx: entrypc index
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @hignpc: high PC address of function
+ * @postprologue_addr: Next address after function prologue (return value)
+ *
+ * Look for prologue-end marker. If there is no explicit marker, return
+ * address of next line record or next source line.
+ */
+static bool die_get_postprologue_addr(unsigned long entrypc_idx,
+ Dwarf_Lines *lines,
+ unsigned long nr_lines,
+ Dwarf_Addr highpc,
+ Dwarf_Addr *postprologue_addr)
+{
+ unsigned long i;
+ int entrypc_lno, lno;
+ Dwarf_Line *line;
+ Dwarf_Addr addr;
+ bool p_end;
+
+ /* entrypc_lno is actual source line number */
+ line = dwarf_onesrcline(lines, entrypc_idx);
+ if (dwarf_lineno(line, &entrypc_lno))
+ return false;
+
+ for (i = entrypc_idx; i < nr_lines; i++) {
+ line = dwarf_onesrcline(lines, i);
+
+ if (dwarf_lineaddr(line, &addr) ||
+ dwarf_lineno(line, &lno) ||
+ dwarf_lineprologueend(line, &p_end))
+ return false;
+
+ /* highpc is exclusive. [entrypc,highpc) */
+ if (addr >= highpc)
+ break;
+
+ /* clang supports prologue-end marker */
+ if (p_end)
+ break;
+
+ /* Actual next line in source */
+ if (lno != entrypc_lno)
+ break;
+
+ /*
+ * Single source line can have multiple line records.
+ * For Example,
+ * void foo() { printf("hello\n"); }
+ * contains two line records. One points to declaration and
+ * other points to printf() line. Variable 'lno' won't get
+ * incremented in this case but 'i' will.
+ */
+ if (i != entrypc_idx)
+ break;
+ }
+
+ dwarf_lineaddr(line, postprologue_addr);
+ if (*postprologue_addr >= highpc)
+ dwarf_lineaddr(dwarf_onesrcline(lines, i - 1),
+ postprologue_addr);
+
+ return true;
+}
+
+/*
+ * die_skip_prologue - Use next address after prologue as probe location
+ * @sp_die: a subprogram DIE
+ * @cu_die: a CU DIE
+ * @entrypc: entrypc of the function
+ *
+ * Function prologue prepares stack and registers before executing function
+ * logic. When target program is compiled without optimization, function
+ * parameter information is only valid after prologue. When we probe entrypc
+ * of the function, and try to record function parameter, it contains
+ * garbage value.
+ */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+ Dwarf_Addr *entrypc)
+{
+ size_t nr_lines = 0;
+ unsigned long entrypc_idx = 0;
+ Dwarf_Lines *lines = NULL;
+ Dwarf_Addr postprologue_addr;
+ Dwarf_Addr highpc;
+
+ if (dwarf_highpc(sp_die, &highpc))
+ return;
+
+ if (dwarf_getsrclines(cu_die, &lines, &nr_lines))
+ return;
+
+ if (!die_search_idx(lines, nr_lines, *entrypc, &entrypc_idx))
+ return;
+
+ if (!die_get_postprologue_addr(entrypc_idx, lines, nr_lines,
+ highpc, &postprologue_addr))
+ return;
+
+ *entrypc = postprologue_addr;
+}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index dc0ce1a..8ac53bf 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -38,6 +38,9 @@
int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
int (*callback)(Dwarf_Die *, void *), void *data);
+/* Get DW_AT_linkage_name (should be NULL for C binary) */
+const char *die_get_linkage_name(Dwarf_Die *dw_die);
+
/* Ensure that this DIE is a subprogram and definition (not declaration) */
bool die_is_func_def(Dwarf_Die *dw_die);
@@ -125,4 +128,12 @@
/* Get the name and type of given variable DIE, stored as "type\tname" */
int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
+
+/* Check if target program is compiled with optimization */
+bool die_is_optimized_target(Dwarf_Die *cu_die);
+
+/* Use next address after prologue as probe location */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+ Dwarf_Addr *entrypc);
+
#endif
diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c
new file mode 100644
index 0000000..62bc4a8
--- /dev/null
+++ b/tools/perf/util/dwarf-regs.c
@@ -0,0 +1,59 @@
+/*
+ * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
+ *
+ * Written by: Masami Hiramatsu <mhiramat@kernel.org>
+ */
+
+#include <util.h>
+#include <debug.h>
+#include <dwarf-regs.h>
+#include <elf.h>
+
+#ifndef EM_AARCH64
+#define EM_AARCH64 183 /* ARM 64 bit */
+#endif
+
+/* Define const char * {arch}_register_tbl[] */
+#define DEFINE_DWARF_REGSTR_TABLE
+#include "../arch/x86/include/dwarf-regs-table.h"
+#include "../arch/arm/include/dwarf-regs-table.h"
+#include "../arch/arm64/include/dwarf-regs-table.h"
+#include "../arch/sh/include/dwarf-regs-table.h"
+#include "../arch/powerpc/include/dwarf-regs-table.h"
+#include "../arch/s390/include/dwarf-regs-table.h"
+#include "../arch/sparc/include/dwarf-regs-table.h"
+#include "../arch/xtensa/include/dwarf-regs-table.h"
+
+#define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL)
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
+{
+ switch (machine) {
+ case EM_NONE: /* Generic arch - use host arch */
+ return get_arch_regstr(n);
+ case EM_386:
+ return __get_dwarf_regstr(x86_32_regstr_tbl, n);
+ case EM_X86_64:
+ return __get_dwarf_regstr(x86_64_regstr_tbl, n);
+ case EM_ARM:
+ return __get_dwarf_regstr(arm_regstr_tbl, n);
+ case EM_AARCH64:
+ return __get_dwarf_regstr(aarch64_regstr_tbl, n);
+ case EM_SH:
+ return __get_dwarf_regstr(sh_regstr_tbl, n);
+ case EM_S390:
+ return __get_dwarf_regstr(s390_regstr_tbl, n);
+ case EM_PPC:
+ case EM_PPC64:
+ return __get_dwarf_regstr(powerpc_regstr_tbl, n);
+ case EM_SPARC:
+ case EM_SPARCV9:
+ return __get_dwarf_regstr(sparc_regstr_tbl, n);
+ case EM_XTENSA:
+ return __get_dwarf_regstr(xtensa_regstr_tbl, n);
+ default:
+ pr_err("ELF MACHINE %x is not supported.\n", machine);
+ }
+ return NULL;
+}
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index e20438b..8ab0d7d 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1,5 +1,6 @@
#include <linux/types.h>
-#include <sys/mman.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include <api/fs/fs.h>
#include "event.h"
#include "debug.h"
#include "hist.h"
@@ -248,6 +249,8 @@
bool truncation = false;
unsigned long long timeout = proc_map_timeout * 1000000ULL;
int rc = 0;
+ const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
+ int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
if (machine__is_default_guest(machine))
return 0;
@@ -343,6 +346,12 @@
if (!strcmp(execname, ""))
strcpy(execname, anonstr);
+ if (hugetlbfs_mnt_len &&
+ !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
+ strcpy(execname, anonstr);
+ event->mmap2.flags |= MAP_HUGETLB;
+ }
+
size = strlen(execname) + 1;
memcpy(event->mmap2.filename, execname, size);
size = PERF_ALIGN(size, sizeof(u64));
@@ -1286,7 +1295,7 @@
* must be done prior to using kernel maps.
*/
if (load_map)
- map__load(al->map, machine->symbol_filter);
+ map__load(al->map);
al->addr = al->map->map_ip(al->map, al->addr);
}
}
@@ -1297,8 +1306,7 @@
{
thread__find_addr_map(thread, cpumode, type, addr, al);
if (al->map != NULL)
- al->sym = map__find_symbol(al->map, al->addr,
- thread->mg->machine->symbol_filter);
+ al->sym = map__find_symbol(al->map, al->addr);
else
al->sym = NULL;
}
@@ -1359,8 +1367,7 @@
al->filtered |= (1 << HIST_FILTER__DSO);
}
- al->sym = map__find_symbol(al->map, al->addr,
- machine->symbol_filter);
+ al->sym = map__find_symbol(al->map, al->addr);
}
if (symbol_conf.sym_list &&
@@ -1416,5 +1423,5 @@
al->sym = NULL;
if (al->map)
- al->sym = map__find_symbol(al->map, al->addr, NULL);
+ al->sym = map__find_symbol(al->map, al->addr);
}
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 097b3ed..ea34c5a 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1032,16 +1032,18 @@
}
static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
- struct mmap_params *mp, int cpu,
+ struct mmap_params *mp, int cpu_idx,
int thread, int *_output, int *_output_backward)
{
struct perf_evsel *evsel;
int revent;
+ int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);
evlist__for_each_entry(evlist, evsel) {
struct perf_mmap *maps = evlist->mmap;
int *output = _output;
int fd;
+ int cpu;
if (evsel->attr.write_backward) {
output = _output_backward;
@@ -1060,6 +1062,10 @@
if (evsel->system_wide && thread)
continue;
+ cpu = cpu_map__idx(evsel->cpus, evlist_cpu);
+ if (cpu == -1)
+ continue;
+
fd = FD(evsel, cpu, thread);
if (*output == -1) {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 21fd573..380e84c 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1045,15 +1045,15 @@
return -1;
}
-int perf_evsel__append_filter(struct perf_evsel *evsel,
- const char *op, const char *filter)
+static int perf_evsel__append_filter(struct perf_evsel *evsel,
+ const char *fmt, const char *filter)
{
char *new_filter;
if (evsel->filter == NULL)
return perf_evsel__set_filter(evsel, filter);
- if (asprintf(&new_filter,"(%s) %s (%s)", evsel->filter, op, filter) > 0) {
+ if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
free(evsel->filter);
evsel->filter = new_filter;
return 0;
@@ -1062,6 +1062,16 @@
return -1;
}
+int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter)
+{
+ return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
+}
+
+int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
+{
+ return perf_evsel__append_filter(evsel, "%s,%s", filter);
+}
+
int perf_evsel__enable(struct perf_evsel *evsel)
{
int nthreads = thread_map__nr(evsel->threads);
@@ -1728,7 +1738,6 @@
data->cpu = data->pid = data->tid = -1;
data->stream_id = data->id = data->time = -1ULL;
data->period = evsel->attr.sample_period;
- data->weight = 0;
data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
if (event->header.type != PERF_RECORD_SAMPLE) {
@@ -1935,7 +1944,6 @@
}
}
- data->weight = 0;
if (type & PERF_SAMPLE_WEIGHT) {
OVERFLOW_CHECK_u64(array);
data->weight = *array;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4d44129..b1503b0 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -46,6 +46,7 @@
PERF_EVSEL__CONFIG_TERM_INHERIT,
PERF_EVSEL__CONFIG_TERM_MAX_STACK,
PERF_EVSEL__CONFIG_TERM_OVERWRITE,
+ PERF_EVSEL__CONFIG_TERM_DRV_CFG,
PERF_EVSEL__CONFIG_TERM_MAX,
};
@@ -57,6 +58,7 @@
u64 freq;
bool time;
char *callgraph;
+ char *drv_cfg;
u64 stack_user;
int max_stack;
bool inherit;
@@ -233,8 +235,9 @@
bool use_sample_identifier);
int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter);
-int perf_evsel__append_filter(struct perf_evsel *evsel,
- const char *op, const char *filter);
+int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter);
+int perf_evsel__append_addr_filter(struct perf_evsel *evsel,
+ const char *filter);
int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
const char *filter);
int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 3674e77..662a0a6 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -122,9 +122,6 @@
if (!node)
break;
- if (node->sym && node->sym->ignore)
- goto next;
-
printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
if (print_ip)
@@ -158,7 +155,7 @@
if (!print_oneline)
printed += fprintf(fp, "\n");
-next:
+
callchain_cursor_advance(cursor);
}
}
@@ -181,7 +178,7 @@
if (cursor != NULL) {
printed += sample__fprintf_callchain(sample, left_alignment,
print_opts, cursor, fp);
- } else if (!(al->sym && al->sym->ignore)) {
+ } else {
printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
if (print_ip)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 8f0db40..85dd0db 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -828,8 +828,7 @@
* default get_cpuid(): nothing gets recorded
* actual implementation must be in arch/$(ARCH)/util/header.c
*/
-int __attribute__ ((weak)) get_cpuid(char *buffer __maybe_unused,
- size_t sz __maybe_unused)
+int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
{
return -1;
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index de15dbc..b02992e 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -177,8 +177,10 @@
hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
- if (h->srcline)
- hists__new_col_len(hists, HISTC_SRCLINE, strlen(h->srcline));
+ if (h->srcline) {
+ len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header));
+ hists__new_col_len(hists, HISTC_SRCLINE, len);
+ }
if (h->srcfile)
hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile));
@@ -417,6 +419,8 @@
}
INIT_LIST_HEAD(&he->pairs.node);
thread__get(he->thread);
+ he->hroot_in = RB_ROOT;
+ he->hroot_out = RB_ROOT;
if (!symbol_conf.report_hierarchy)
he->leaf = true;
@@ -2149,6 +2153,50 @@
return he;
}
+static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
+ struct rb_root *root,
+ struct hist_entry *pair)
+{
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ struct hist_entry *he;
+ struct perf_hpp_fmt *fmt;
+
+ p = &root->rb_node;
+ while (*p != NULL) {
+ int64_t cmp = 0;
+
+ parent = *p;
+ he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+ perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+ cmp = fmt->collapse(fmt, he, pair);
+ if (cmp)
+ break;
+ }
+ if (!cmp)
+ goto out;
+
+ if (cmp < 0)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+
+ he = hist_entry__new(pair, true);
+ if (he) {
+ rb_link_node(&he->rb_node_in, parent, p);
+ rb_insert_color(&he->rb_node_in, root);
+
+ he->dummy = true;
+ he->hists = hists;
+ memset(&he->stat, 0, sizeof(he->stat));
+ hists__inc_stats(hists, he);
+ }
+out:
+ return he;
+}
+
static struct hist_entry *hists__find_entry(struct hists *hists,
struct hist_entry *he)
{
@@ -2174,6 +2222,51 @@
return NULL;
}
+static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root,
+ struct hist_entry *he)
+{
+ struct rb_node *n = root->rb_node;
+
+ while (n) {
+ struct hist_entry *iter;
+ struct perf_hpp_fmt *fmt;
+ int64_t cmp = 0;
+
+ iter = rb_entry(n, struct hist_entry, rb_node_in);
+ perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+ cmp = fmt->collapse(fmt, iter, he);
+ if (cmp)
+ break;
+ }
+
+ if (cmp < 0)
+ n = n->rb_left;
+ else if (cmp > 0)
+ n = n->rb_right;
+ else
+ return iter;
+ }
+
+ return NULL;
+}
+
+static void hists__match_hierarchy(struct rb_root *leader_root,
+ struct rb_root *other_root)
+{
+ struct rb_node *nd;
+ struct hist_entry *pos, *pair;
+
+ for (nd = rb_first(leader_root); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct hist_entry, rb_node_in);
+ pair = hists__find_hierarchy_entry(other_root, pos);
+
+ if (pair) {
+ hist_entry__add_pair(pair, pos);
+ hists__match_hierarchy(&pos->hroot_in, &pair->hroot_in);
+ }
+ }
+}
+
/*
* Look for pairs to link to the leader buckets (hist_entries):
*/
@@ -2183,6 +2276,12 @@
struct rb_node *nd;
struct hist_entry *pos, *pair;
+ if (symbol_conf.report_hierarchy) {
+ /* hierarchy report always collapses entries */
+ return hists__match_hierarchy(&leader->entries_collapsed,
+ &other->entries_collapsed);
+ }
+
if (hists__has(leader, need_collapse))
root = &leader->entries_collapsed;
else
@@ -2197,6 +2296,50 @@
}
}
+static int hists__link_hierarchy(struct hists *leader_hists,
+ struct hist_entry *parent,
+ struct rb_root *leader_root,
+ struct rb_root *other_root)
+{
+ struct rb_node *nd;
+ struct hist_entry *pos, *leader;
+
+ for (nd = rb_first(other_root); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct hist_entry, rb_node_in);
+
+ if (hist_entry__has_pairs(pos)) {
+ bool found = false;
+
+ list_for_each_entry(leader, &pos->pairs.head, pairs.node) {
+ if (leader->hists == leader_hists) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return -1;
+ } else {
+ leader = add_dummy_hierarchy_entry(leader_hists,
+ leader_root, pos);
+ if (leader == NULL)
+ return -1;
+
+ /* do not point parent in the pos */
+ leader->parent_he = parent;
+
+ hist_entry__add_pair(pos, leader);
+ }
+
+ if (!pos->leaf) {
+ if (hists__link_hierarchy(leader_hists, leader,
+ &leader->hroot_in,
+ &pos->hroot_in) < 0)
+ return -1;
+ }
+ }
+ return 0;
+}
+
/*
* Look for entries in the other hists that are not present in the leader, if
* we find them, just add a dummy entry on the leader hists, with period=0,
@@ -2208,6 +2351,13 @@
struct rb_node *nd;
struct hist_entry *pos, *pair;
+ if (symbol_conf.report_hierarchy) {
+ /* hierarchy report always collapses entries */
+ return hists__link_hierarchy(leader, NULL,
+ &leader->entries_collapsed,
+ &other->entries_collapsed);
+ }
+
if (hists__has(other, need_collapse))
root = &other->entries_collapsed;
else
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 0a1edf1..9928fed 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -230,7 +230,7 @@
struct perf_hpp_fmt {
const char *name;
int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct hists *hists);
+ struct hists *hists, int line, int *span);
int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hists *hists);
int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
@@ -259,6 +259,7 @@
struct list_head fields;
struct list_head sorts;
+ int nr_header_lines;
int need_collapse;
int parent;
int sym;
@@ -367,6 +368,7 @@
void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists);
void perf_hpp__set_user_width(const char *width_list_str);
+void hists__reset_column_width(struct hists *hists);
typedef u64 (*hpp_field_fn)(struct hist_entry *he);
typedef int (*hpp_callback_fn)(struct perf_hpp *hpp, bool front);
@@ -483,5 +485,10 @@
#define HIERARCHY_INDENT 3
bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list);
+int hists__fprintf_headers(struct hists *hists, FILE *fp);
#endif /* __PERF_HIST_H */
diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
index 07c644e..43bfd8d 100644
--- a/tools/perf/util/include/dwarf-regs.h
+++ b/tools/perf/util/include/dwarf-regs.h
@@ -3,6 +3,12 @@
#ifdef HAVE_DWARF_SUPPORT
const char *get_arch_regstr(unsigned int n);
+/*
+ * get_dwarf_regstr - Returns ftrace register string from DWARF regnum
+ * n: DWARF register number
+ * machine: ELF machine signature (EM_*)
+ */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine);
#endif
#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 749e6f2..f545ec1 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -346,7 +346,7 @@
goto out_put;
/* Load maps to ensure dso->is_64_bit has been updated */
- map__load(al.map, machine->symbol_filter);
+ map__load(al.map);
x86_64 = al.map->dso->is_64_bit;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 8ff6c6a..7591a0c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -80,6 +80,7 @@
int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
uint64_t max_insn_cnt, void *data);
+ bool (*pgd_ip)(uint64_t ip, void *data);
void *data;
struct intel_pt_state state;
const unsigned char *buf;
@@ -186,6 +187,7 @@
decoder->get_trace = params->get_trace;
decoder->walk_insn = params->walk_insn;
+ decoder->pgd_ip = params->pgd_ip;
decoder->data = params->data;
decoder->return_compression = params->return_compression;
@@ -1008,6 +1010,19 @@
int err;
err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+ if (err == INTEL_PT_RETURN &&
+ decoder->pgd_ip &&
+ decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+ (decoder->state.type & INTEL_PT_BRANCH) &&
+ decoder->pgd_ip(decoder->state.to_ip, decoder->data)) {
+ /* Unconditional branch leaving filter region */
+ decoder->no_progress = 0;
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.to_ip = 0;
+ return 0;
+ }
if (err == INTEL_PT_RETURN)
return 0;
if (err)
@@ -1036,6 +1051,21 @@
}
if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ uint64_t to_ip = decoder->ip + intel_pt_insn.length +
+ intel_pt_insn.rel;
+
+ if (decoder->pgd_ip &&
+ decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+ decoder->pgd_ip(to_ip, decoder->data)) {
+ /* Conditional branch leaving filter region */
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->ip = to_ip;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+ }
intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
decoder->ip);
decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 02c38fe..8939998 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -83,6 +83,7 @@
int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
uint64_t max_insn_cnt, void *data);
+ bool (*pgd_ip)(uint64_t ip, void *data);
void *data;
bool return_compression;
uint64_t period;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 551ff6f..dc041d4 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -103,6 +103,9 @@
unsigned max_non_turbo_ratio;
unsigned long num_events;
+
+ char *filter;
+ struct addr_filters filts;
};
enum switch_state {
@@ -241,7 +244,7 @@
}
queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
-
+next:
buffer = auxtrace_buffer__next(queue, buffer);
if (!buffer) {
if (old_buffer)
@@ -264,9 +267,6 @@
intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
return -ENOMEM;
- if (old_buffer)
- auxtrace_buffer__drop_data(old_buffer);
-
if (buffer->use_data) {
b->len = buffer->use_size;
b->buf = buffer->use_data;
@@ -276,6 +276,16 @@
}
b->ref_timestamp = buffer->reference;
+ /*
+ * If in snapshot mode and the buffer has no usable data, get next
+ * buffer and again check overlap against old_buffer.
+ */
+ if (ptq->pt->snapshot_mode && !b->len)
+ goto next;
+
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+
if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
!buffer->consecutive)) {
b->consecutive = false;
@@ -477,7 +487,7 @@
start_ip = *ip;
/* Load maps to ensure dso->is_64_bit has been updated */
- map__load(al.map, machine->symbol_filter);
+ map__load(al.map);
x86_64 = al.map->dso->is_64_bit;
@@ -541,6 +551,76 @@
return 0;
}
+static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
+ uint64_t offset, const char *filename)
+{
+ struct addr_filter *filt;
+ bool have_filter = false;
+ bool hit_tracestop = false;
+ bool hit_filter = false;
+
+ list_for_each_entry(filt, &pt->filts.head, list) {
+ if (filt->start)
+ have_filter = true;
+
+ if ((filename && !filt->filename) ||
+ (!filename && filt->filename) ||
+ (filename && strcmp(filename, filt->filename)))
+ continue;
+
+ if (!(offset >= filt->addr && offset < filt->addr + filt->size))
+ continue;
+
+ intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
+ ip, offset, filename ? filename : "[kernel]",
+ filt->start ? "filter" : "stop",
+ filt->addr, filt->size);
+
+ if (filt->start)
+ hit_filter = true;
+ else
+ hit_tracestop = true;
+ }
+
+ if (!hit_tracestop && !hit_filter)
+ intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
+ ip, offset, filename ? filename : "[kernel]");
+
+ return hit_tracestop || (have_filter && !hit_filter);
+}
+
+static int __intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct thread *thread;
+ struct addr_location al;
+ u8 cpumode;
+ u64 offset;
+
+ if (ip >= ptq->pt->kernel_start)
+ return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+
+ cpumode = PERF_RECORD_MISC_USER;
+
+ thread = ptq->thread;
+ if (!thread)
+ return -EINVAL;
+
+ thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
+ if (!al.map || !al.map->dso)
+ return -EINVAL;
+
+ offset = al.map->map_ip(al.map, ip);
+
+ return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
+ al.map->dso->long_name);
+}
+
+static bool intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+ return __intel_pt_pgd_ip(ip, data) > 0;
+}
+
static bool intel_pt_get_config(struct intel_pt *pt,
struct perf_event_attr *attr, u64 *config)
{
@@ -717,6 +797,9 @@
params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
+ if (pt->filts.cnt > 0)
+ params.pgd_ip = intel_pt_pgd_ip;
+
if (pt->synth_opts.instructions) {
if (pt->synth_opts.period) {
switch (pt->synth_opts.period_type) {
@@ -1294,7 +1377,7 @@
if (!map)
return 0;
- if (map__load(map, machine->symbol_filter))
+ if (map__load(map))
return 0;
start = dso__first_symbol(map->dso, MAP__FUNCTION);
@@ -1767,6 +1850,8 @@
intel_pt_free_events(session);
session->auxtrace = NULL;
thread__put(pt->unknown_thread);
+ addr_filters__exit(&pt->filts);
+ zfree(&pt->filter);
free(pt);
}
@@ -2016,6 +2101,8 @@
[INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n",
[INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n",
[INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n",
+ [INTEL_PT_MAX_NONTURBO_RATIO] = " Max non-turbo ratio %"PRIu64"\n",
+ [INTEL_PT_FILTER_STR_LEN] = " Filter string len. %"PRIu64"\n",
};
static void intel_pt_print_info(u64 *arr, int start, int finish)
@@ -2029,12 +2116,28 @@
fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
}
+static void intel_pt_print_info_str(const char *name, const char *str)
+{
+ if (!dump_trace)
+ return;
+
+ fprintf(stdout, " %-20s%s\n", name, str ? str : "");
+}
+
+static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
+{
+ return auxtrace_info->header.size >=
+ sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
+}
+
int intel_pt_process_auxtrace_info(union perf_event *event,
struct perf_session *session)
{
struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
struct intel_pt *pt;
+ void *info_end;
+ u64 *info;
int err;
if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
@@ -2045,6 +2148,8 @@
if (!pt)
return -ENOMEM;
+ addr_filters__init(&pt->filts);
+
perf_config(intel_pt_perf_config, pt);
err = auxtrace_queues__init(&pt->queues);
@@ -2069,8 +2174,7 @@
intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
INTEL_PT_PER_CPU_MMAPS);
- if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
- (sizeof(u64) * INTEL_PT_CYC_BIT)) {
+ if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
@@ -2080,6 +2184,54 @@
INTEL_PT_CYC_BIT);
}
+ if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
+ pt->max_non_turbo_ratio =
+ auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
+ intel_pt_print_info(&auxtrace_info->priv[0],
+ INTEL_PT_MAX_NONTURBO_RATIO,
+ INTEL_PT_MAX_NONTURBO_RATIO);
+ }
+
+ info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
+ info_end = (void *)info + auxtrace_info->header.size;
+
+ if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
+ size_t len;
+
+ len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
+ intel_pt_print_info(&auxtrace_info->priv[0],
+ INTEL_PT_FILTER_STR_LEN,
+ INTEL_PT_FILTER_STR_LEN);
+ if (len) {
+ const char *filter = (const char *)info;
+
+ len = roundup(len + 1, 8);
+ info += len >> 3;
+ if ((void *)info > info_end) {
+ pr_err("%s: bad filter string length\n", __func__);
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ pt->filter = memdup(filter, len);
+ if (!pt->filter) {
+ err = -ENOMEM;
+ goto err_free_queues;
+ }
+ if (session->header.needs_swap)
+ mem_bswap_64(pt->filter, len);
+ if (pt->filter[len - 1]) {
+ pr_err("%s: filter string not null terminated\n", __func__);
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ err = addr_filters__parse_bare_filter(&pt->filts,
+ filter);
+ if (err)
+ goto err_free_queues;
+ }
+ intel_pt_print_info_str("Filter string", pt->filter);
+ }
+
pt->timeless_decoding = intel_pt_timeless_decoding(pt);
pt->have_tsc = intel_pt_have_tsc(pt);
pt->sampling_mode = false;
@@ -2121,11 +2273,13 @@
pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
if (!pt->switch_evsel) {
pr_err("%s: missing sched_switch event\n", __func__);
+ err = -EINVAL;
goto err_delete_thread;
}
} else if (pt->have_sched_switch == 2 &&
!intel_pt_find_switch(session->evlist)) {
pr_err("%s: missing context_switch attribute flag\n", __func__);
+ err = -EINVAL;
goto err_delete_thread;
}
@@ -2149,7 +2303,9 @@
if (pt->tc.time_mult) {
u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
- pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000;
+ if (!pt->max_non_turbo_ratio)
+ pt->max_non_turbo_ratio =
+ (tsc_freq + 50000000) / 100000000;
intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
intel_pt_log("Maximum non-turbo ratio %u\n",
pt->max_non_turbo_ratio);
@@ -2193,6 +2349,8 @@
auxtrace_queues__free(&pt->queues);
session->auxtrace = NULL;
err_free:
+ addr_filters__exit(&pt->filts);
+ zfree(&pt->filter);
free(pt);
return err;
}
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
index 0065949..e13b14e 100644
--- a/tools/perf/util/intel-pt.h
+++ b/tools/perf/util/intel-pt.h
@@ -34,11 +34,11 @@
INTEL_PT_TSC_CTC_N,
INTEL_PT_TSC_CTC_D,
INTEL_PT_CYC_BIT,
+ INTEL_PT_MAX_NONTURBO_RATIO,
+ INTEL_PT_FILTER_STR_LEN,
INTEL_PT_AUXTRACE_PRIV_MAX,
};
-#define INTEL_PT_AUXTRACE_PRIV_SIZE (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64))
-
struct auxtrace_record;
struct perf_tool;
union perf_event;
diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c
index 95a1acb..9ddea5c 100644
--- a/tools/perf/util/lzma.c
+++ b/tools/perf/util/lzma.c
@@ -29,6 +29,7 @@
lzma_action action = LZMA_RUN;
lzma_stream strm = LZMA_STREAM_INIT;
lzma_ret ret;
+ int err = -1;
u8 buf_in[BUFSIZE];
u8 buf_out[BUFSIZE];
@@ -45,7 +46,7 @@
if (ret != LZMA_OK) {
pr_err("lzma: lzma_stream_decoder failed %s (%d)\n",
lzma_strerror(ret), ret);
- return -1;
+ goto err_fclose;
}
strm.next_in = NULL;
@@ -60,7 +61,7 @@
if (ferror(infile)) {
pr_err("lzma: read error: %s\n", strerror(errno));
- return -1;
+ goto err_fclose;
}
if (feof(infile))
@@ -74,7 +75,7 @@
if (writen(output_fd, buf_out, write_size) != write_size) {
pr_err("lzma: write error: %s\n", strerror(errno));
- return -1;
+ goto err_fclose;
}
strm.next_out = buf_out;
@@ -83,13 +84,15 @@
if (ret != LZMA_OK) {
if (ret == LZMA_STREAM_END)
- return 0;
+ break;
pr_err("lzma: failed %s\n", lzma_strerror(ret));
- return -1;
+ goto err_fclose;
}
}
+ err = 0;
+err_fclose:
fclose(infile);
- return 0;
+ return err;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index cb6388d..18e4519 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -41,7 +41,6 @@
machine->pid = pid;
- machine->symbol_filter = NULL;
machine->id_hdr_size = 0;
machine->kptr_restrict_warned = false;
machine->comm_exec = false;
@@ -148,7 +147,6 @@
{
machine__init(&machines->host, "", HOST_KERNEL_ID);
machines->guests = RB_ROOT;
- machines->symbol_filter = NULL;
}
void machines__exit(struct machines *machines)
@@ -172,8 +170,6 @@
return NULL;
}
- machine->symbol_filter = machines->symbol_filter;
-
while (*p != NULL) {
parent = *p;
pos = rb_entry(parent, struct machine, rb_node);
@@ -189,21 +185,6 @@
return machine;
}
-void machines__set_symbol_filter(struct machines *machines,
- symbol_filter_t symbol_filter)
-{
- struct rb_node *nd;
-
- machines->symbol_filter = symbol_filter;
- machines->host.symbol_filter = symbol_filter;
-
- for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
- struct machine *machine = rb_entry(nd, struct machine, rb_node);
-
- machine->symbol_filter = symbol_filter;
- }
-}
-
void machines__set_comm_exec(struct machines *machines, bool comm_exec)
{
struct rb_node *nd;
@@ -916,10 +897,10 @@
}
int __machine__load_kallsyms(struct machine *machine, const char *filename,
- enum map_type type, bool no_kcore, symbol_filter_t filter)
+ enum map_type type, bool no_kcore)
{
struct map *map = machine__kernel_map(machine);
- int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore, filter);
+ int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore);
if (ret > 0) {
dso__set_loaded(map->dso, type);
@@ -935,16 +916,15 @@
}
int machine__load_kallsyms(struct machine *machine, const char *filename,
- enum map_type type, symbol_filter_t filter)
+ enum map_type type)
{
- return __machine__load_kallsyms(machine, filename, type, false, filter);
+ return __machine__load_kallsyms(machine, filename, type, false);
}
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
- symbol_filter_t filter)
+int machine__load_vmlinux_path(struct machine *machine, enum map_type type)
{
struct map *map = machine__kernel_map(machine);
- int ret = dso__load_vmlinux_path(map->dso, map, filter);
+ int ret = dso__load_vmlinux_path(map->dso, map);
if (ret > 0)
dso__set_loaded(map->dso, type);
@@ -1313,7 +1293,7 @@
/*
* preload dso of guest kernel and modules
*/
- dso__load(kernel, machine__kernel_map(machine), NULL);
+ dso__load(kernel, machine__kernel_map(machine));
}
}
return 0;
@@ -2115,7 +2095,7 @@
*/
machine->kernel_start = 1ULL << 63;
if (map) {
- err = map__load(map, machine->symbol_filter);
+ err = map__load(map);
if (map->start)
machine->kernel_start = map->start;
}
@@ -2131,7 +2111,7 @@
{
struct machine *machine = vmachine;
struct map *map;
- struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map, NULL);
+ struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map);
if (sym == NULL)
return NULL;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 20739f7..354de6e 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -41,7 +41,6 @@
struct map_groups kmaps;
struct map *vmlinux_maps[MAP__NR_TYPES];
u64 kernel_start;
- symbol_filter_t symbol_filter;
pid_t *current_tid;
union { /* Tool specific area */
void *priv;
@@ -110,7 +109,6 @@
struct machines {
struct machine host;
struct rb_root guests;
- symbol_filter_t symbol_filter;
};
void machines__init(struct machines *machines);
@@ -128,8 +126,6 @@
void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
-void machines__set_symbol_filter(struct machines *machines,
- symbol_filter_t symbol_filter);
void machines__set_comm_exec(struct machines *machines, bool comm_exec);
struct machine *machine__new_host(void);
@@ -178,40 +174,33 @@
static inline
struct symbol *machine__find_kernel_symbol(struct machine *machine,
enum map_type type, u64 addr,
- struct map **mapp,
- symbol_filter_t filter)
+ struct map **mapp)
{
- return map_groups__find_symbol(&machine->kmaps, type, addr,
- mapp, filter);
+ return map_groups__find_symbol(&machine->kmaps, type, addr, mapp);
}
static inline
struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
enum map_type type, const char *name,
- struct map **mapp,
- symbol_filter_t filter)
+ struct map **mapp)
{
- return map_groups__find_symbol_by_name(&machine->kmaps, type, name,
- mapp, filter);
+ return map_groups__find_symbol_by_name(&machine->kmaps, type, name, mapp);
}
static inline
struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
- struct map **mapp,
- symbol_filter_t filter)
+ struct map **mapp)
{
return machine__find_kernel_symbol(machine, MAP__FUNCTION, addr,
- mapp, filter);
+ mapp);
}
static inline
struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
const char *name,
- struct map **mapp,
- symbol_filter_t filter)
+ struct map **mapp)
{
- return map_groups__find_function_by_name(&machine->kmaps, name, mapp,
- filter);
+ return map_groups__find_function_by_name(&machine->kmaps, name, mapp);
}
struct map *machine__findnew_module_map(struct machine *machine, u64 start,
@@ -219,11 +208,10 @@
int arch__fix_module_text_start(u64 *start, const char *name);
int __machine__load_kallsyms(struct machine *machine, const char *filename,
- enum map_type type, bool no_kcore, symbol_filter_t filter);
+ enum map_type type, bool no_kcore);
int machine__load_kallsyms(struct machine *machine, const char *filename,
- enum map_type type, symbol_filter_t filter);
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
- symbol_filter_t filter);
+ enum map_type type);
+int machine__load_vmlinux_path(struct machine *machine, enum map_type type);
size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
bool (skip)(struct dso *dso, int parm), int parm);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 728129a..c662fef 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -6,6 +6,7 @@
#include <string.h>
#include <stdio.h>
#include <unistd.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
#include "map.h"
#include "thread.h"
#include "strlist.h"
@@ -24,9 +25,10 @@
[MAP__VARIABLE] = "Variables",
};
-static inline int is_anon_memory(const char *filename)
+static inline int is_anon_memory(const char *filename, u32 flags)
{
- return !strcmp(filename, "//anon") ||
+ return flags & MAP_HUGETLB ||
+ !strcmp(filename, "//anon") ||
!strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
!strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
}
@@ -155,7 +157,7 @@
int anon, no_dso, vdso, android;
android = is_android_lib(filename);
- anon = is_anon_memory(filename);
+ anon = is_anon_memory(filename, flags);
vdso = is_vdso_map(filename);
no_dso = is_no_dso_memory(filename);
@@ -279,7 +281,7 @@
#define DSO__DELETED "(deleted)"
-int map__load(struct map *map, symbol_filter_t filter)
+int map__load(struct map *map)
{
const char *name = map->dso->long_name;
int nr;
@@ -287,7 +289,7 @@
if (dso__loaded(map->dso, map->type))
return 0;
- nr = dso__load(map->dso, map, filter);
+ nr = dso__load(map->dso, map);
if (nr < 0) {
if (map->dso->has_build_id) {
char sbuild_id[SBUILD_ID_SIZE];
@@ -312,9 +314,6 @@
pr_warning("%.*s was updated (is prelink enabled?). "
"Restart the long running apps that use it!\n",
(int)real_len, name);
- } else if (filter) {
- pr_warning("no symbols passed the given filter.\n");
- return -2; /* Empty but maybe by the filter */
} else {
pr_warning("no symbols found in %s, maybe install "
"a debug package?\n", name);
@@ -331,19 +330,17 @@
return strcmp(namea, nameb);
}
-struct symbol *map__find_symbol(struct map *map, u64 addr,
- symbol_filter_t filter)
+struct symbol *map__find_symbol(struct map *map, u64 addr)
{
- if (map__load(map, filter) < 0)
+ if (map__load(map) < 0)
return NULL;
return dso__find_symbol(map->dso, map->type, addr);
}
-struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
- symbol_filter_t filter)
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
{
- if (map__load(map, filter) < 0)
+ if (map__load(map) < 0)
return NULL;
if (!dso__sorted_by_name(map->dso, map->type))
@@ -556,23 +553,22 @@
struct symbol *map_groups__find_symbol(struct map_groups *mg,
enum map_type type, u64 addr,
- struct map **mapp,
- symbol_filter_t filter)
+ struct map **mapp)
{
struct map *map = map_groups__find(mg, type, addr);
/* Ensure map is loaded before using map->map_ip */
- if (map != NULL && map__load(map, filter) >= 0) {
+ if (map != NULL && map__load(map) >= 0) {
if (mapp != NULL)
*mapp = map;
- return map__find_symbol(map, map->map_ip(map, addr), filter);
+ return map__find_symbol(map, map->map_ip(map, addr));
}
return NULL;
}
struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
- struct map **mapp, symbol_filter_t filter)
+ struct map **mapp)
{
struct symbol *sym;
struct rb_node *nd;
@@ -582,7 +578,7 @@
for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
struct map *pos = rb_entry(nd, struct map, rb_node);
- sym = map__find_symbol_by_name(pos, name, filter);
+ sym = map__find_symbol_by_name(pos, name);
if (sym == NULL)
continue;
@@ -600,15 +596,14 @@
struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
enum map_type type,
const char *name,
- struct map **mapp,
- symbol_filter_t filter)
+ struct map **mapp)
{
- struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp, filter);
+ struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp);
return sym;
}
-int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter)
+int map_groups__find_ams(struct addr_map_symbol *ams)
{
if (ams->addr < ams->map->start || ams->addr >= ams->map->end) {
if (ams->map->groups == NULL)
@@ -620,7 +615,7 @@
}
ams->al_addr = ams->map->map_ip(ams->map, ams->addr);
- ams->sym = map__find_symbol(ams->map, ams->al_addr, filter);
+ ams->sym = map__find_symbol(ams->map, ams->al_addr);
return ams->sym ? 0 : -1;
}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index d83396c..abdacf8 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -127,17 +127,14 @@
* @map: the 'struct map *' in which symbols itereated
* @sym_name: the symbol name
* @pos: the 'struct symbol *' to use as a loop cursor
- * @filter: to use when loading the DSO
*/
-#define __map__for_each_symbol_by_name(map, sym_name, pos, filter) \
- for (pos = map__find_symbol_by_name(map, sym_name, filter); \
+#define __map__for_each_symbol_by_name(map, sym_name, pos) \
+ for (pos = map__find_symbol_by_name(map, sym_name); \
pos && arch__compare_symbol_names(pos->name, sym_name) == 0; \
pos = symbol__next_by_name(pos))
#define map__for_each_symbol_by_name(map, sym_name, pos) \
- __map__for_each_symbol_by_name(map, sym_name, (pos), NULL)
-
-typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
+ __map__for_each_symbol_by_name(map, sym_name, (pos))
int arch__compare_symbol_names(const char *namea, const char *nameb);
void map__init(struct map *map, enum map_type type,
@@ -173,11 +170,9 @@
int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
FILE *fp);
-int map__load(struct map *map, symbol_filter_t filter);
-struct symbol *map__find_symbol(struct map *map,
- u64 addr, symbol_filter_t filter);
-struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
- symbol_filter_t filter);
+int map__load(struct map *map);
+struct symbol *map__find_symbol(struct map *map, u64 addr);
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name);
void map__fixup_start(struct map *map);
void map__fixup_end(struct map *map);
@@ -191,7 +186,7 @@
struct map *maps__first(struct maps *maps);
struct map *map__next(struct map *map);
struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
- struct map **mapp, symbol_filter_t filter);
+ struct map **mapp);
void map_groups__init(struct map_groups *mg, struct machine *machine);
void map_groups__exit(struct map_groups *mg);
int map_groups__clone(struct thread *thread,
@@ -231,25 +226,22 @@
struct symbol *map_groups__find_symbol(struct map_groups *mg,
enum map_type type, u64 addr,
- struct map **mapp,
- symbol_filter_t filter);
+ struct map **mapp);
struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
enum map_type type,
const char *name,
- struct map **mapp,
- symbol_filter_t filter);
+ struct map **mapp);
struct addr_map_symbol;
-int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter);
+int map_groups__find_ams(struct addr_map_symbol *ams);
static inline
struct symbol *map_groups__find_function_by_name(struct map_groups *mg,
- const char *name, struct map **mapp,
- symbol_filter_t filter)
+ const char *name, struct map **mapp)
{
- return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp, filter);
+ return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp);
}
int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 6c913c3..33546c3 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -904,6 +904,7 @@
[PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack",
[PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite",
[PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite",
+ [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config",
};
static bool config_term_shrinked;
@@ -1034,7 +1035,8 @@
struct parse_events_term *term,
struct parse_events_error *err)
{
- if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER)
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER ||
+ term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG)
/*
* Always succeed for sysfs terms, as we dont know
* at this point what type they need to have.
@@ -1134,6 +1136,9 @@
case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 0 : 1);
break;
+ case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+ ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str);
+ break;
default:
break;
}
@@ -1755,20 +1760,49 @@
static int set_filter(struct perf_evsel *evsel, const void *arg)
{
const char *str = arg;
+ bool found = false;
+ int nr_addr_filters = 0;
+ struct perf_pmu *pmu = NULL;
- if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) {
- fprintf(stderr,
- "--filter option should follow a -e tracepoint option\n");
- return -1;
+ if (evsel == NULL)
+ goto err;
+
+ if (evsel->attr.type == PERF_TYPE_TRACEPOINT) {
+ if (perf_evsel__append_tp_filter(evsel, str) < 0) {
+ fprintf(stderr,
+ "not enough memory to hold filter string\n");
+ return -1;
+ }
+
+ return 0;
}
- if (perf_evsel__append_filter(evsel, "&&", str) < 0) {
+ while ((pmu = perf_pmu__scan(pmu)) != NULL)
+ if (pmu->type == evsel->attr.type) {
+ found = true;
+ break;
+ }
+
+ if (found)
+ perf_pmu__scan_file(pmu, "nr_addr_filters",
+ "%d", &nr_addr_filters);
+
+ if (!nr_addr_filters)
+ goto err;
+
+ if (perf_evsel__append_addr_filter(evsel, str) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
}
return 0;
+
+err:
+ fprintf(stderr,
+ "--filter option should follow a -e tracepoint or HW tracer option\n");
+
+ return -1;
}
int parse_filter(const struct option *opt, const char *str,
@@ -1793,7 +1827,7 @@
snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
- if (perf_evsel__append_filter(evsel, "&&", new_filter) < 0) {
+ if (perf_evsel__append_tp_filter(evsel, new_filter) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index d1edbf8..8d09a97 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -71,6 +71,7 @@
PARSE_EVENTS__TERM_TYPE_MAX_STACK,
PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
PARSE_EVENTS__TERM_TYPE_OVERWRITE,
+ PARSE_EVENTS__TERM_TYPE_DRV_CFG,
__PARSE_EVENTS__TERM_TYPE_NR,
};
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 7a25194..9f43fda 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -53,6 +53,26 @@
return token;
}
+/*
+ * This function is called when the parser gets two kind of input:
+ *
+ * @cfg1 or @cfg2=config
+ *
+ * The leading '@' is stripped off before 'cfg1' and 'cfg2=config' are given to
+ * bison. In the latter case it is necessary to keep the string intact so that
+ * the PMU kernel driver can determine what configurable is associated to
+ * 'config'.
+ */
+static int drv_str(yyscan_t scanner, int token)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ /* Strip off the '@' */
+ yylval->str = strdup(text + 1);
+ return token;
+}
+
#define REWIND(__alloc) \
do { \
YYSTYPE *__yylval = parse_events_get_lval(yyscanner); \
@@ -124,6 +144,7 @@
num_raw_hex [a-fA-F0-9]+
name [a-zA-Z_*?][a-zA-Z0-9_*?.]*
name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
+drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
/* If you add a modifier you need to update check_modifier() */
modifier_event [ukhpPGHSDI]+
modifier_bp [rwx]{1,3}
@@ -209,6 +230,7 @@
{name_minus} { return str(yyscanner, PE_NAME); }
\[all\] { return PE_ARRAY_ALL; }
"[" { BEGIN(array); return '['; }
+@{drv_cfg_term} { return drv_str(yyscanner, PE_DRV_CFG_TERM); }
}
<mem>{
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 5be4a5f..879115f 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -49,6 +49,7 @@
%token PE_ERROR
%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
%token PE_ARRAY_ALL PE_ARRAY_RANGE
+%token PE_DRV_CFG_TERM
%type <num> PE_VALUE
%type <num> PE_VALUE_SYM_HW
%type <num> PE_VALUE_SYM_SW
@@ -63,6 +64,7 @@
%type <str> PE_MODIFIER_BP
%type <str> PE_EVENT_NAME
%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%type <str> PE_DRV_CFG_TERM
%type <num> value_sym
%type <head> event_config
%type <head> opt_event_config
@@ -599,6 +601,15 @@
term->array = $2;
$$ = term;
}
+|
+PE_DRV_CFG_TERM
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG,
+ $1, $1, &@1, NULL));
+ $$ = term;
+}
array:
'[' array_terms ']'
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index ddb0261..2babcdf 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -445,14 +445,23 @@
FILE *file;
struct cpu_map *cpus;
const char *sysfs = sysfs__mountpoint();
+ const char *templates[] = {
+ "%s/bus/event_source/devices/%s/cpumask",
+ "%s/bus/event_source/devices/%s/cpus",
+ NULL
+ };
+ const char **template;
if (!sysfs)
return NULL;
- snprintf(path, PATH_MAX,
- "%s/bus/event_source/devices/%s/cpumask", sysfs, name);
+ for (template = templates; *template; template++) {
+ snprintf(path, PATH_MAX, *template, sysfs, name);
+ if (stat(path, &st) == 0)
+ break;
+ }
- if (stat(path, &st) < 0)
+ if (!*template)
return NULL;
file = fopen(path, "r");
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 5d7e844..743422a 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -4,6 +4,7 @@
#include <linux/bitmap.h>
#include <linux/perf_event.h>
#include <stdbool.h>
+#include "evsel.h"
#include "parse-events.h"
enum {
@@ -25,6 +26,7 @@
struct list_head format; /* HEAD struct perf_pmu_format -> list */
struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
struct list_head list; /* ELEM */
+ int (*set_drv_config) (struct perf_evsel_config_term *term);
};
struct perf_pmu_info {
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 2873396..fcfbef0 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -110,13 +110,12 @@
static struct symbol *__find_kernel_function_by_name(const char *name,
struct map **mapp)
{
- return machine__find_kernel_function_by_name(host_machine, name, mapp,
- NULL);
+ return machine__find_kernel_function_by_name(host_machine, name, mapp);
}
static struct symbol *__find_kernel_function(u64 addr, struct map **mapp)
{
- return machine__find_kernel_function(host_machine, addr, mapp, NULL);
+ return machine__find_kernel_function(host_machine, addr, mapp);
}
static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
@@ -125,7 +124,7 @@
struct kmap *kmap;
struct map *map = machine__kernel_map(host_machine);
- if (map__load(map, NULL) < 0)
+ if (map__load(map) < 0)
return NULL;
kmap = map__kmap(map);
@@ -214,9 +213,13 @@
goto out;
}
- ptr2 = strpbrk(ptr1, "-._");
- if (ptr2)
- *ptr2 = '\0';
+ for (ptr2 = ptr1; ptr2 != '\0'; ptr2++) {
+ if (!isalnum(*ptr2) && *ptr2 != '_') {
+ *ptr2 = '\0';
+ break;
+ }
+ }
+
ret = e_snprintf(buf, 64, "%s_%s", PERFPROBE_GROUP, ptr1);
if (ret < 0)
goto out;
@@ -351,9 +354,9 @@
vmlinux_name = symbol_conf.vmlinux_name;
dso->load_errno = 0;
if (vmlinux_name)
- ret = dso__load_vmlinux(dso, map, vmlinux_name, false, NULL);
+ ret = dso__load_vmlinux(dso, map, vmlinux_name, false);
else
- ret = dso__load_vmlinux_path(dso, map, NULL);
+ ret = dso__load_vmlinux_path(dso, map);
found:
*pdso = dso;
return ret;
@@ -674,6 +677,10 @@
char *tmp;
int i, skipped = 0;
+ /* Skip post process if the target is an offline kernel */
+ if (symbol_conf.ignore_vmlinux_buildid)
+ return 0;
+
reloc_sym = kernel_get_ref_reloc_sym();
if (!reloc_sym) {
pr_warning("Relocated base symbol is not found!\n");
@@ -1614,19 +1621,27 @@
return ret;
}
+/* Returns true if *any* ARG is either C variable, $params or $vars. */
+bool perf_probe_with_var(struct perf_probe_event *pev)
+{
+ int i = 0;
+
+ for (i = 0; i < pev->nargs; i++)
+ if (is_c_varname(pev->args[i].var) ||
+ !strcmp(pev->args[i].var, PROBE_ARG_PARAMS) ||
+ !strcmp(pev->args[i].var, PROBE_ARG_VARS))
+ return true;
+ return false;
+}
+
/* Return true if this perf_probe_event requires debuginfo */
bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
{
- int i;
-
if (pev->point.file || pev->point.line || pev->point.lazy_line)
return true;
- for (i = 0; i < pev->nargs; i++)
- if (is_c_varname(pev->args[i].var) ||
- !strcmp(pev->args[i].var, "$params") ||
- !strcmp(pev->args[i].var, "$vars"))
- return true;
+ if (perf_probe_with_var(pev))
+ return true;
return false;
}
@@ -1987,7 +2002,7 @@
map = dso__new_map(tp->module);
if (!map)
goto out;
- sym = map__find_symbol(map, addr, NULL);
+ sym = map__find_symbol(map, addr);
} else {
if (tp->symbol && !addr) {
if (kernel_get_symbol_address_by_name(tp->symbol,
@@ -2692,7 +2707,7 @@
struct symbol *sym;
struct rb_node *tmp;
- if (map__load(map, NULL) < 0)
+ if (map__load(map) < 0)
return 0;
map__for_each_symbol(map, sym, tmp) {
@@ -3207,6 +3222,52 @@
return 0;
}
+static int show_probe_trace_event(struct probe_trace_event *tev)
+{
+ char *buf = synthesize_probe_trace_command(tev);
+
+ if (!buf) {
+ pr_debug("Failed to synthesize probe trace event.\n");
+ return -EINVAL;
+ }
+
+ /* Showing definition always go stdout */
+ printf("%s\n", buf);
+ free(buf);
+
+ return 0;
+}
+
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs)
+{
+ struct strlist *namelist = strlist__new(NULL, NULL);
+ struct probe_trace_event *tev;
+ struct perf_probe_event *pev;
+ int i, j, ret = 0;
+
+ if (!namelist)
+ return -ENOMEM;
+
+ for (j = 0; j < npevs && !ret; j++) {
+ pev = &pevs[j];
+ for (i = 0; i < pev->ntevs && !ret; i++) {
+ tev = &pev->tevs[i];
+ /* Skip if the symbol is out of .text or blacklisted */
+ if (!tev->point.symbol && !pev->uprobes)
+ continue;
+
+ /* Set new name for tev (and update namelist) */
+ ret = probe_trace_event__set_name(tev, pev,
+ namelist, true);
+ if (!ret)
+ ret = show_probe_trace_event(tev);
+ }
+ }
+ strlist__delete(namelist);
+
+ return ret;
+}
+
int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs)
{
int i, ret = 0;
@@ -3289,24 +3350,10 @@
return ret;
}
-/* TODO: don't use a global variable for filter ... */
-static struct strfilter *available_func_filter;
-
-/*
- * If a symbol corresponds to a function with global binding and
- * matches filter return 0. For all others return 1.
- */
-static int filter_available_functions(struct map *map __maybe_unused,
- struct symbol *sym)
-{
- if (strfilter__compare(available_func_filter, sym->name))
- return 0;
- return 1;
-}
-
int show_available_funcs(const char *target, struct strfilter *_filter,
bool user)
{
+ struct rb_node *nd;
struct map *map;
int ret;
@@ -3324,9 +3371,7 @@
return -EINVAL;
}
- /* Load symbols with given filter */
- available_func_filter = _filter;
- ret = map__load(map, filter_available_functions);
+ ret = map__load(map);
if (ret) {
if (ret == -2) {
char *str = strfilter__string(_filter);
@@ -3343,7 +3388,14 @@
/* Show all (filtered) symbols */
setup_pager();
- dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
+
+ for (nd = rb_first(&map->dso->symbol_names[map->type]); nd; nd = rb_next(nd)) {
+ struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
+
+ if (strfilter__compare(_filter, pos->sym.name))
+ printf("%s\n", pos->sym.name);
+ }
+
end:
if (user) {
map__put(map);
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index f4f45db..8091d15 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -128,6 +128,8 @@
int perf_probe_event__copy(struct perf_probe_event *dst,
struct perf_probe_event *src);
+bool perf_probe_with_var(struct perf_probe_event *pev);
+
/* Check the perf_probe_event needs debuginfo */
bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
@@ -147,6 +149,7 @@
int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs);
void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
int del_perf_probe_events(struct strfilter *filter);
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 9c3b9ed..436b647 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -73,11 +73,10 @@
static int open_probe_events(const char *trace_file, bool readwrite)
{
char buf[PATH_MAX];
- const char *tracing_dir = "";
int ret;
- ret = e_snprintf(buf, PATH_MAX, "%s/%s%s",
- tracing_path, tracing_dir, trace_file);
+ ret = e_snprintf(buf, PATH_MAX, "%s/%s",
+ tracing_path, trace_file);
if (ret >= 0) {
pr_debug("Opening %s write=%d\n", buf, readwrite);
if (readwrite && !probe_event_dry_run)
@@ -700,7 +699,7 @@
INIT_LIST_HEAD(&sdtlist);
ret = get_sdt_note_list(&sdtlist, pathname);
if (ret < 0) {
- pr_debug("Failed to get sdt note: %d\n", ret);
+ pr_debug4("Failed to get sdt note: %d\n", ret);
return ret;
}
list_for_each_entry(note, &sdtlist, note_list) {
@@ -877,3 +876,60 @@
return 0;
}
+
+static struct {
+ const char *pattern;
+ bool avail;
+ bool checked;
+} probe_type_table[] = {
+#define DEFINE_TYPE(idx, pat, def_avail) \
+ [idx] = {.pattern = pat, .avail = (def_avail)}
+ DEFINE_TYPE(PROBE_TYPE_U, "* u8/16/32/64,*", true),
+ DEFINE_TYPE(PROBE_TYPE_S, "* s8/16/32/64,*", true),
+ DEFINE_TYPE(PROBE_TYPE_X, "* x8/16/32/64,*", false),
+ DEFINE_TYPE(PROBE_TYPE_STRING, "* string,*", true),
+ DEFINE_TYPE(PROBE_TYPE_BITFIELD,
+ "* b<bit-width>@<bit-offset>/<container-size>", true),
+};
+
+bool probe_type_is_available(enum probe_type type)
+{
+ FILE *fp;
+ char *buf = NULL;
+ size_t len = 0;
+ bool target_line = false;
+ bool ret = probe_type_table[type].avail;
+
+ if (type >= PROBE_TYPE_END)
+ return false;
+ /* We don't have to check the type which supported by default */
+ if (ret || probe_type_table[type].checked)
+ return ret;
+
+ if (asprintf(&buf, "%s/README", tracing_path) < 0)
+ return ret;
+
+ fp = fopen(buf, "r");
+ if (!fp)
+ goto end;
+
+ zfree(&buf);
+ while (getline(&buf, &len, fp) > 0 && !ret) {
+ if (!target_line) {
+ target_line = !!strstr(buf, " type: ");
+ if (!target_line)
+ continue;
+ } else if (strstr(buf, "\t ") != buf)
+ break;
+ ret = strglobmatch(buf, probe_type_table[type].pattern);
+ }
+ /* Cache the result */
+ probe_type_table[type].checked = true;
+ probe_type_table[type].avail = ret;
+
+ fclose(fp);
+end:
+ free(buf);
+
+ return ret;
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
index 9577b5c..eba44c3 100644
--- a/tools/perf/util/probe-file.h
+++ b/tools/perf/util/probe-file.h
@@ -19,6 +19,15 @@
struct list_head entries;
};
+enum probe_type {
+ PROBE_TYPE_U = 0,
+ PROBE_TYPE_S,
+ PROBE_TYPE_X,
+ PROBE_TYPE_STRING,
+ PROBE_TYPE_BITFIELD,
+ PROBE_TYPE_END,
+};
+
#define PF_FL_UPROBE 1
#define PF_FL_RW 2
#define for_each_probe_cache_entry(entry, pcache) \
@@ -54,6 +63,7 @@
struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
const char *group, const char *event);
int probe_cache__show_all_caches(struct strfilter *filter);
+bool probe_type_is_available(enum probe_type type);
#else /* ! HAVE_LIBELF_SUPPORT */
static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused)
{
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 5c290c6..df4debe 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -39,6 +39,7 @@
#include "util.h"
#include "symbol.h"
#include "probe-finder.h"
+#include "probe-file.h"
/* Kprobe tracer basic type is up to u64 */
#define MAX_BASIC_TYPE_BITS 64
@@ -170,6 +171,7 @@
*/
static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
Dwarf_Op *fb_ops, Dwarf_Die *sp_die,
+ unsigned int machine,
struct probe_trace_arg *tvar)
{
Dwarf_Attribute attr;
@@ -265,7 +267,7 @@
if (!tvar)
return ret2;
- regs = get_arch_regstr(regn);
+ regs = get_dwarf_regstr(regn, machine);
if (!regs) {
/* This should be a bug in DWARF or this tool */
pr_warning("Mapping for the register number %u "
@@ -297,13 +299,13 @@
char sbuf[STRERR_BUFSIZE];
int bsize, boffs, total;
int ret;
- char sign;
+ char prefix;
/* TODO: check all types */
- if (cast && strcmp(cast, "string") != 0 &&
+ if (cast && strcmp(cast, "string") != 0 && strcmp(cast, "x") != 0 &&
strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
/* Non string type is OK */
- /* and respect signedness cast */
+ /* and respect signedness/hexadecimal cast */
tvar->type = strdup(cast);
return (tvar->type == NULL) ? -ENOMEM : 0;
}
@@ -365,11 +367,15 @@
}
if (cast && (strcmp(cast, "u") == 0))
- sign = 'u';
+ prefix = 'u';
else if (cast && (strcmp(cast, "s") == 0))
- sign = 's';
+ prefix = 's';
+ else if (cast && (strcmp(cast, "x") == 0) &&
+ probe_type_is_available(PROBE_TYPE_X))
+ prefix = 'x';
else
- sign = die_is_signed_type(&type) ? 's' : 'u';
+ prefix = die_is_signed_type(&type) ? 's' :
+ probe_type_is_available(PROBE_TYPE_X) ? 'x' : 'u';
ret = dwarf_bytesize(&type);
if (ret <= 0)
@@ -383,7 +389,7 @@
dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
ret = MAX_BASIC_TYPE_BITS;
}
- ret = snprintf(buf, 16, "%c%d", sign, ret);
+ ret = snprintf(buf, 16, "%c%d", prefix, ret);
formatted:
if (ret < 0 || ret >= 16) {
@@ -538,7 +544,7 @@
dwarf_diename(vr_die));
ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
- &pf->sp_die, pf->tvar);
+ &pf->sp_die, pf->machine, pf->tvar);
if (ret == -ENOENT || ret == -EINVAL) {
pr_err("Failed to find the location of the '%s' variable at this address.\n"
" Perhaps it has been optimized out.\n"
@@ -901,6 +907,38 @@
return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
}
+static void skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+ struct perf_probe_point *pp = &pf->pev->point;
+
+ /* Not uprobe? */
+ if (!pf->pev->uprobes)
+ return;
+
+ /* Compiled with optimization? */
+ if (die_is_optimized_target(&pf->cu_die))
+ return;
+
+ /* Don't know entrypc? */
+ if (!pf->addr)
+ return;
+
+ /* Only FUNC and FUNC@SRC are eligible. */
+ if (!pp->function || pp->line || pp->retprobe || pp->lazy_line ||
+ pp->offset || pp->abs_address)
+ return;
+
+ /* Not interested in func parameter? */
+ if (!perf_probe_with_var(pf->pev))
+ return;
+
+ pr_info("Target program is compiled without optimization. Skipping prologue.\n"
+ "Probe on address 0x%" PRIx64 " to force probing at the function entry.\n\n",
+ pf->addr);
+
+ die_skip_prologue(sp_die, &pf->cu_die, &pf->addr);
+}
+
static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
{
struct probe_finder *pf = data;
@@ -917,6 +955,11 @@
dwarf_diename(in_die));
return -ENOENT;
}
+ if (addr == 0) {
+ pr_debug("%s has no valid entry address. skipped.\n",
+ dwarf_diename(in_die));
+ return -ENOENT;
+ }
pf->addr = addr;
pf->addr += pp->offset;
pr_debug("found inline addr: 0x%jx\n",
@@ -950,7 +993,8 @@
if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die)))
return DWARF_CB_OK;
- pr_debug("Matched function: %s\n", dwarf_diename(sp_die));
+ pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die),
+ (unsigned long)dwarf_dieoffset(sp_die));
pf->fname = dwarf_decl_file(sp_die);
if (pp->line) { /* Function relative line */
dwarf_decl_line(sp_die, &pf->lno);
@@ -959,10 +1003,16 @@
} else if (die_is_func_instance(sp_die)) {
/* Instances always have the entry address */
dwarf_entrypc(sp_die, &pf->addr);
+ /* But in some case the entry address is 0 */
+ if (pf->addr == 0) {
+ pr_debug("%s has no entry PC. Skipped\n",
+ dwarf_diename(sp_die));
+ param->retval = 0;
/* Real function */
- if (pp->lazy_line)
+ } else if (pp->lazy_line)
param->retval = find_probe_point_lazy(sp_die, pf);
else {
+ skip_prologue(sp_die, pf);
pf->addr += pp->offset;
/* TODO: Check the address in this function */
param->retval = call_probe_finder(sp_die, pf);
@@ -972,7 +1022,7 @@
param->retval = die_walk_instances(sp_die,
probe_point_inline_cb, (void *)pf);
/* This could be a non-existed inline definition */
- if (param->retval == -ENOENT && strisglob(pp->function))
+ if (param->retval == -ENOENT)
param->retval = 0;
}
@@ -1101,11 +1151,8 @@
struct probe_finder *pf)
{
int ret = 0;
-
-#if _ELFUTILS_PREREQ(0, 142)
Elf *elf;
GElf_Ehdr ehdr;
- GElf_Shdr shdr;
if (pf->cfi_eh || pf->cfi_dbg)
return debuginfo__find_probe_location(dbg, pf);
@@ -1118,11 +1165,18 @@
if (gelf_getehdr(elf, &ehdr) == NULL)
return -EINVAL;
- if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
- shdr.sh_type == SHT_PROGBITS)
- pf->cfi_eh = dwarf_getcfi_elf(elf);
+ pf->machine = ehdr.e_machine;
- pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+#if _ELFUTILS_PREREQ(0, 142)
+ do {
+ GElf_Shdr shdr;
+
+ if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
+ shdr.sh_type == SHT_PROGBITS)
+ pf->cfi_eh = dwarf_getcfi_elf(elf);
+
+ pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+ } while (0);
#endif
ret = debuginfo__find_probe_location(dbg, pf);
@@ -1150,7 +1204,7 @@
(tag == DW_TAG_variable && vf->vars)) {
if (convert_variable_location(die_mem, vf->pf->addr,
vf->pf->fb_ops, &pf->sp_die,
- NULL) == 0) {
+ pf->machine, NULL) == 0) {
vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem);
if (vf->args[vf->nargs].var == NULL) {
vf->ret = -ENOMEM;
@@ -1313,7 +1367,7 @@
tag == DW_TAG_variable) {
ret = convert_variable_location(die_mem, af->pf.addr,
af->pf.fb_ops, &af->pf.sp_die,
- NULL);
+ af->pf.machine, NULL);
if (ret == 0 || ret == -ERANGE) {
int ret2;
bool externs = !af->child;
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 51137fc..f1d8558 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -80,6 +80,7 @@
Dwarf_CFI *cfi_dbg;
#endif
Dwarf_Op *fb_ops; /* Frame base attribute */
+ unsigned int machine; /* Target machine arch */
struct perf_probe_arg *pvar; /* Current target variable */
struct probe_trace_arg *tvar; /* Current result variable */
};
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 5d1eb1c..e55a132 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -25,6 +25,7 @@
#include <ctype.h>
#include <errno.h>
#include <linux/bitmap.h>
+#include <linux/time64.h>
#include "../util.h"
#include <EXTERN.h>
@@ -359,8 +360,8 @@
if (!test_and_set_bit(event->id, events_defined))
define_event_symbols(event, handler, event->print_fmt.args);
- s = nsecs / NSECS_PER_SEC;
- ns = nsecs - s * NSECS_PER_SEC;
+ s = nsecs / NSEC_PER_SEC;
+ ns = nsecs - s * NSEC_PER_SEC;
scripting_context->event_data = data;
scripting_context->pevent = evsel->tp_format->pevent;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index e0203b9..089438d 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -27,6 +27,7 @@
#include <stdbool.h>
#include <errno.h>
#include <linux/bitmap.h>
+#include <linux/time64.h>
#include "../../perf.h"
#include "../debug.h"
@@ -426,8 +427,8 @@
if (!dict)
Py_FatalError("couldn't create Python dict");
}
- s = nsecs / NSECS_PER_SEC;
- ns = nsecs - s * NSECS_PER_SEC;
+ s = nsecs / NSEC_PER_SEC;
+ ns = nsecs - s * NSEC_PER_SEC;
scripting_context->event_data = data;
scripting_context->pevent = evsel->tp_format->pevent;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 3d3cb83..452e15a 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -11,7 +11,7 @@
regex_t parent_regex;
const char default_parent_pattern[] = "^sys_|^do_page_fault";
const char *parent_pattern = default_parent_pattern;
-const char default_sort_order[] = "comm,dso,symbol";
+const char *default_sort_order = "comm,dso,symbol";
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
const char default_top_sort_order[] = "dso,symbol";
@@ -867,7 +867,7 @@
};
/* --sort daddr_sym */
-static int64_t
+int64_t
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
{
uint64_t l = 0, r = 0;
@@ -896,7 +896,7 @@
width);
}
-static int64_t
+int64_t
sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
{
uint64_t l = 0, r = 0;
@@ -1062,7 +1062,7 @@
return repsep_snprintf(bf, size, "%-*s", width, out);
}
-static int64_t
+int64_t
sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
{
u64 l, r;
@@ -1492,7 +1492,8 @@
}
static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct hists *hists)
+ struct hists *hists, int line __maybe_unused,
+ int *span __maybe_unused)
{
struct hpp_sort_entry *hse;
size_t len = fmt->user_len;
@@ -1797,7 +1798,9 @@
}
static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct hists *hists __maybe_unused)
+ struct hists *hists __maybe_unused,
+ int line __maybe_unused,
+ int *span __maybe_unused)
{
struct hpp_dynamic_entry *hde;
size_t len = fmt->user_len;
@@ -2305,9 +2308,9 @@
return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
}
-static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
- struct perf_evlist *evlist,
- int level)
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+ struct perf_evlist *evlist,
+ int level)
{
unsigned int i;
@@ -2682,7 +2685,7 @@
}
}
-static int output_field_add(struct perf_hpp_list *list, char *tok)
+int output_field_add(struct perf_hpp_list *list, char *tok)
{
unsigned int i;
@@ -2745,7 +2748,7 @@
return ret;
}
-static void reset_dimensions(void)
+void reset_dimensions(void)
{
unsigned int i;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 7ca37ea..099c975 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -28,7 +28,7 @@
extern const char *field_order;
extern const char default_parent_pattern[];
extern const char *parent_pattern;
-extern const char default_sort_order[];
+extern const char *default_sort_order;
extern regex_t ignore_callees_regex;
extern int have_ignore_callees;
extern enum sort_mode sort__mode;
@@ -40,6 +40,7 @@
extern struct sort_entry sort_dso_to;
extern struct sort_entry sort_sym_from;
extern struct sort_entry sort_sym_to;
+extern struct sort_entry sort_srcline;
extern enum sort_type sort__first_dimension;
extern const char default_mem_sort_order[];
@@ -268,4 +269,15 @@
bool is_strict_order(const char *order);
int hpp_dimension__add_output(unsigned col);
+void reset_dimensions(void);
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+ struct perf_evlist *evlist,
+ int level);
+int output_field_add(struct perf_hpp_list *list, char *tok);
+int64_t
+sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
#endif /* __PERF_SORT_H */
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index eec6c11..1cbada2 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -18,6 +18,7 @@
#include <unistd.h>
#include <string.h>
#include <linux/bitmap.h>
+#include <linux/time64.h>
#include "perf.h"
#include "svghelper.h"
@@ -274,14 +275,14 @@
text[0] = 0;
- if (duration < 1000) /* less than 1 usec */
+ if (duration < NSEC_PER_USEC) /* less than 1 usec */
return text;
- if (duration < 1000 * 1000) { /* less than 1 msec */
- sprintf(text, "%.1f us", duration / 1000.0);
+ if (duration < NSEC_PER_MSEC) { /* less than 1 msec */
+ sprintf(text, "%.1f us", duration / (double)NSEC_PER_USEC);
return text;
}
- sprintf(text, "%.1f ms", duration / 1000.0 / 1000);
+ sprintf(text, "%.1f ms", duration / (double)NSEC_PER_MSEC);
return text;
}
@@ -297,7 +298,7 @@
style = "waiting";
- if (end-start > 10 * 1000000) /* 10 msec */
+ if (end-start > 10 * NSEC_PER_MSEC) /* 10 msec */
style = "WAITING";
text = time_to_string(end-start);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index a811c13..99400b0 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -206,6 +206,37 @@
return NULL;
}
+static bool want_demangle(bool is_kernel_sym)
+{
+ return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
+}
+
+static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
+{
+ int demangle_flags = verbose ? (DMGL_PARAMS | DMGL_ANSI) : DMGL_NO_OPTS;
+ char *demangled = NULL;
+
+ /*
+ * We need to figure out if the object was created from C++ sources
+ * DWARF DW_compile_unit has this, but we don't always have access
+ * to it...
+ */
+ if (!want_demangle(dso->kernel || kmodule))
+ return demangled;
+
+ demangled = bfd_demangle(NULL, elf_name, demangle_flags);
+ if (demangled == NULL)
+ demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
+ else if (rust_is_mangled(demangled))
+ /*
+ * Input to Rust demangling is the BFD-demangled
+ * name which it Rust-demangles in place.
+ */
+ rust_demangle_sym(demangled);
+
+ return demangled;
+}
+
#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \
for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \
idx < nr_entries; \
@@ -223,8 +254,7 @@
* And always look at the original dso, not at debuginfo packages, that
* have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
*/
-int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map,
- symbol_filter_t filter)
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map)
{
uint32_t nr_rel_entries, idx;
GElf_Sym sym;
@@ -301,45 +331,53 @@
elf_section__for_each_rela(reldata, pos, pos_mem, idx,
nr_rel_entries) {
+ const char *elf_name = NULL;
+ char *demangled = NULL;
symidx = GELF_R_SYM(pos->r_info);
plt_offset += shdr_plt.sh_entsize;
gelf_getsym(syms, symidx, &sym);
+
+ elf_name = elf_sym__name(&sym, symstrs);
+ demangled = demangle_sym(dso, 0, elf_name);
+ if (demangled != NULL)
+ elf_name = demangled;
snprintf(sympltname, sizeof(sympltname),
- "%s@plt", elf_sym__name(&sym, symstrs));
+ "%s@plt", elf_name);
+ free(demangled);
f = symbol__new(plt_offset, shdr_plt.sh_entsize,
STB_GLOBAL, sympltname);
if (!f)
goto out_elf_end;
- if (filter && filter(map, f))
- symbol__delete(f);
- else {
- symbols__insert(&dso->symbols[map->type], f);
- ++nr;
- }
+ symbols__insert(&dso->symbols[map->type], f);
+ ++nr;
}
} else if (shdr_rel_plt.sh_type == SHT_REL) {
GElf_Rel pos_mem, *pos;
elf_section__for_each_rel(reldata, pos, pos_mem, idx,
nr_rel_entries) {
+ const char *elf_name = NULL;
+ char *demangled = NULL;
symidx = GELF_R_SYM(pos->r_info);
plt_offset += shdr_plt.sh_entsize;
gelf_getsym(syms, symidx, &sym);
+
+ elf_name = elf_sym__name(&sym, symstrs);
+ demangled = demangle_sym(dso, 0, elf_name);
+ if (demangled != NULL)
+ elf_name = demangled;
snprintf(sympltname, sizeof(sympltname),
- "%s@plt", elf_sym__name(&sym, symstrs));
+ "%s@plt", elf_name);
+ free(demangled);
f = symbol__new(plt_offset, shdr_plt.sh_entsize,
STB_GLOBAL, sympltname);
if (!f)
goto out_elf_end;
- if (filter && filter(map, f))
- symbol__delete(f);
- else {
- symbols__insert(&dso->symbols[map->type], f);
- ++nr;
- }
+ symbols__insert(&dso->symbols[map->type], f);
+ ++nr;
}
}
@@ -685,7 +723,7 @@
}
/* Always reject images with a mismatched build-id: */
- if (dso->has_build_id) {
+ if (dso->has_build_id && !symbol_conf.ignore_vmlinux_buildid) {
u8 build_id[BUILD_ID_SIZE];
if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) {
@@ -775,17 +813,11 @@
return 0;
}
-static bool want_demangle(bool is_kernel_sym)
-{
- return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
-}
-
void __weak arch__sym_update(struct symbol *s __maybe_unused,
GElf_Sym *sym __maybe_unused) { }
-int dso__load_sym(struct dso *dso, struct map *map,
- struct symsrc *syms_ss, struct symsrc *runtime_ss,
- symbol_filter_t filter, int kmodule)
+int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+ struct symsrc *runtime_ss, int kmodule)
{
struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL;
struct map_groups *kmaps = kmap ? map__kmaps(map) : NULL;
@@ -1070,29 +1102,10 @@
sym.st_value -= shdr.sh_addr - shdr.sh_offset;
}
new_symbol:
- /*
- * We need to figure out if the object was created from C++ sources
- * DWARF DW_compile_unit has this, but we don't always have access
- * to it...
- */
- if (want_demangle(dso->kernel || kmodule)) {
- int demangle_flags = DMGL_NO_OPTS;
- if (verbose)
- demangle_flags = DMGL_PARAMS | DMGL_ANSI;
+ demangled = demangle_sym(dso, kmodule, elf_name);
+ if (demangled != NULL)
+ elf_name = demangled;
- demangled = bfd_demangle(NULL, elf_name, demangle_flags);
- if (demangled == NULL)
- demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
- else if (rust_is_mangled(demangled))
- /*
- * Input to Rust demangling is the BFD-demangled
- * name which it Rust-demangles in place.
- */
- rust_demangle_sym(demangled);
-
- if (demangled != NULL)
- elf_name = demangled;
- }
f = symbol__new(sym.st_value, sym.st_size,
GELF_ST_BIND(sym.st_info), elf_name);
free(demangled);
@@ -1101,21 +1114,16 @@
arch__sym_update(f, &sym);
- if (filter && filter(curr_map, f))
- symbol__delete(f);
- else {
- symbols__insert(&curr_dso->symbols[curr_map->type], f);
- nr++;
- }
+ __symbols__insert(&curr_dso->symbols[curr_map->type], f, dso->kernel);
+ nr++;
}
/*
* For misannotated, zeroed, ASM function sizes.
*/
if (nr > 0) {
- if (!symbol_conf.allow_aliases)
- symbols__fixup_duplicate(&dso->symbols[map->type]);
symbols__fixup_end(&dso->symbols[map->type]);
+ symbols__fixup_duplicate(&dso->symbols[map->type]);
if (kmap) {
/*
* We need to fixup this here too because we create new
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index 4890633..11cdde9 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -287,8 +287,7 @@
int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused,
struct symsrc *ss __maybe_unused,
- struct map *map __maybe_unused,
- symbol_filter_t filter __maybe_unused)
+ struct map *map __maybe_unused)
{
return 0;
}
@@ -334,7 +333,6 @@
int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
struct symsrc *ss,
struct symsrc *runtime_ss __maybe_unused,
- symbol_filter_t filter __maybe_unused,
int kmodule __maybe_unused)
{
unsigned char build_id[BUILD_ID_SIZE];
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 37e8d20..aecff69 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -9,6 +9,7 @@
#include <fcntl.h>
#include <unistd.h>
#include <inttypes.h>
+#include "annotate.h"
#include "build-id.h"
#include "util.h"
#include "debug.h"
@@ -23,10 +24,10 @@
#include <symbol/kallsyms.h>
#include <sys/utsname.h>
-static int dso__load_kernel_sym(struct dso *dso, struct map *map,
- symbol_filter_t filter);
-static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
- symbol_filter_t filter);
+static int dso__load_kernel_sym(struct dso *dso, struct map *map);
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map);
+static bool symbol__is_idle(const char *name);
+
int vmlinux_path__nr_entries;
char **vmlinux_path;
@@ -152,6 +153,9 @@
struct rb_node *nd;
struct symbol *curr, *next;
+ if (symbol_conf.allow_aliases)
+ return;
+
nd = rb_first(symbols);
while (nd) {
@@ -235,8 +239,13 @@
if (sym == NULL)
return NULL;
- if (symbol_conf.priv_size)
+ if (symbol_conf.priv_size) {
+ if (symbol_conf.init_annotation) {
+ struct annotation *notes = (void *)sym;
+ pthread_mutex_init(¬es->lock, NULL);
+ }
sym = ((void *)sym) + symbol_conf.priv_size;
+ }
sym->start = start;
sym->end = len ? start + len : start;
@@ -268,13 +277,24 @@
}
}
-void symbols__insert(struct rb_root *symbols, struct symbol *sym)
+void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel)
{
struct rb_node **p = &symbols->rb_node;
struct rb_node *parent = NULL;
const u64 ip = sym->start;
struct symbol *s;
+ if (kernel) {
+ const char *name = sym->name;
+ /*
+ * ppc64 uses function descriptors and appends a '.' to the
+ * start of every instruction address. Remove it.
+ */
+ if (name[0] == '.')
+ name++;
+ sym->idle = symbol__is_idle(name);
+ }
+
while (*p != NULL) {
parent = *p;
s = rb_entry(parent, struct symbol, rb_node);
@@ -287,6 +307,11 @@
rb_insert_color(&sym->rb_node, symbols);
}
+void symbols__insert(struct rb_root *symbols, struct symbol *sym)
+{
+ __symbols__insert(symbols, sym, false);
+}
+
static struct symbol *symbols__find(struct rb_root *symbols, u64 ip)
{
struct rb_node *n;
@@ -320,6 +345,16 @@
return NULL;
}
+static struct symbol *symbols__last(struct rb_root *symbols)
+{
+ struct rb_node *n = rb_last(symbols);
+
+ if (n)
+ return rb_entry(n, struct symbol, rb_node);
+
+ return NULL;
+}
+
static struct symbol *symbols__next(struct symbol *sym)
{
struct rb_node *n = rb_next(&sym->rb_node);
@@ -415,7 +450,7 @@
void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym)
{
- symbols__insert(&dso->symbols[type], sym);
+ __symbols__insert(&dso->symbols[type], sym, dso->kernel);
/* update the symbol cache if necessary */
if (dso->last_find_result[type].addr >= sym->start &&
@@ -441,6 +476,11 @@
return symbols__first(&dso->symbols[type]);
}
+struct symbol *dso__last_symbol(struct dso *dso, enum map_type type)
+{
+ return symbols__last(&dso->symbols[type]);
+}
+
struct symbol *dso__next_symbol(struct symbol *sym)
{
return symbols__next(sym);
@@ -537,7 +577,7 @@
* These are symbols in the kernel image, so make sure that
* sym is from a kernel DSO.
*/
-bool symbol__is_idle(struct symbol *sym)
+static bool symbol__is_idle(const char *name)
{
const char * const idle_symbols[] = {
"cpu_idle",
@@ -554,14 +594,10 @@
"pseries_dedicated_idle_sleep",
NULL
};
-
int i;
- if (!sym)
- return false;
-
for (i = 0; idle_symbols[i]; i++) {
- if (!strcmp(idle_symbols[i], sym->name))
+ if (!strcmp(idle_symbols[i], name))
return true;
}
@@ -590,7 +626,7 @@
* We will pass the symbols to the filter later, in
* map__split_kallsyms, when we have split the maps per module
*/
- symbols__insert(root, sym);
+ __symbols__insert(root, sym, !strchr(name, '['));
return 0;
}
@@ -607,8 +643,7 @@
return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
}
-static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
- symbol_filter_t filter)
+static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
{
struct map_groups *kmaps = map__kmaps(map);
struct map *curr_map;
@@ -637,7 +672,7 @@
curr_map = map_groups__find(kmaps, map->type, pos->start);
- if (!curr_map || (filter && filter(curr_map, pos))) {
+ if (!curr_map) {
symbol__delete(pos);
continue;
}
@@ -660,8 +695,7 @@
* kernel range is broken in several maps, named [kernel].N, as we don't have
* the original ELF section names vmlinux have.
*/
-static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta,
- symbol_filter_t filter)
+static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
{
struct map_groups *kmaps = map__kmaps(map);
struct machine *machine;
@@ -738,7 +772,7 @@
if (count == 0) {
curr_map = map;
- goto filter_symbol;
+ goto add_symbol;
}
if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
@@ -770,18 +804,18 @@
pos->start -= delta;
pos->end -= delta;
}
-filter_symbol:
- if (filter && filter(curr_map, pos)) {
-discard_symbol: rb_erase(&pos->rb_node, root);
- symbol__delete(pos);
- } else {
- if (curr_map != map) {
- rb_erase(&pos->rb_node, root);
- symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
- ++moved;
- } else
- ++count;
- }
+add_symbol:
+ if (curr_map != map) {
+ rb_erase(&pos->rb_node, root);
+ symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+ ++moved;
+ } else
+ ++count;
+
+ continue;
+discard_symbol:
+ rb_erase(&pos->rb_node, root);
+ symbol__delete(pos);
}
if (curr_map != map &&
@@ -1221,7 +1255,7 @@
}
int __dso__load_kallsyms(struct dso *dso, const char *filename,
- struct map *map, bool no_kcore, symbol_filter_t filter)
+ struct map *map, bool no_kcore)
{
u64 delta = 0;
@@ -1234,8 +1268,8 @@
if (kallsyms__delta(map, filename, &delta))
return -1;
- symbols__fixup_duplicate(&dso->symbols[map->type]);
symbols__fixup_end(&dso->symbols[map->type]);
+ symbols__fixup_duplicate(&dso->symbols[map->type]);
if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
@@ -1243,19 +1277,18 @@
dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
if (!no_kcore && !dso__load_kcore(dso, map, filename))
- return dso__split_kallsyms_for_kcore(dso, map, filter);
+ return dso__split_kallsyms_for_kcore(dso, map);
else
- return dso__split_kallsyms(dso, map, delta, filter);
+ return dso__split_kallsyms(dso, map, delta);
}
int dso__load_kallsyms(struct dso *dso, const char *filename,
- struct map *map, symbol_filter_t filter)
+ struct map *map)
{
- return __dso__load_kallsyms(dso, filename, map, false, filter);
+ return __dso__load_kallsyms(dso, filename, map, false);
}
-static int dso__load_perf_map(struct dso *dso, struct map *map,
- symbol_filter_t filter)
+static int dso__load_perf_map(struct dso *dso, struct map *map)
{
char *line = NULL;
size_t n;
@@ -1297,12 +1330,8 @@
if (sym == NULL)
goto out_delete_line;
- if (filter && filter(map, sym))
- symbol__delete(sym);
- else {
- symbols__insert(&dso->symbols[map->type], sym);
- nr_syms++;
- }
+ symbols__insert(&dso->symbols[map->type], sym);
+ nr_syms++;
}
free(line);
@@ -1358,7 +1387,7 @@
}
}
-int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
+int dso__load(struct dso *dso, struct map *map)
{
char *name;
int ret = -1;
@@ -1381,9 +1410,9 @@
if (dso->kernel) {
if (dso->kernel == DSO_TYPE_KERNEL)
- ret = dso__load_kernel_sym(dso, map, filter);
+ ret = dso__load_kernel_sym(dso, map);
else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
- ret = dso__load_guest_kernel_sym(dso, map, filter);
+ ret = dso__load_guest_kernel_sym(dso, map);
goto out;
}
@@ -1407,7 +1436,7 @@
goto out;
}
- ret = dso__load_perf_map(dso, map, filter);
+ ret = dso__load_perf_map(dso, map);
dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
DSO_BINARY_TYPE__NOT_FOUND;
goto out;
@@ -1498,14 +1527,14 @@
kmod = true;
if (syms_ss)
- ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, kmod);
+ ret = dso__load_sym(dso, map, syms_ss, runtime_ss, kmod);
else
ret = -1;
if (ret > 0) {
int nr_plt;
- nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map, filter);
+ nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map);
if (nr_plt > 0)
ret += nr_plt;
}
@@ -1544,8 +1573,7 @@
}
int dso__load_vmlinux(struct dso *dso, struct map *map,
- const char *vmlinux, bool vmlinux_allocated,
- symbol_filter_t filter)
+ const char *vmlinux, bool vmlinux_allocated)
{
int err = -1;
struct symsrc ss;
@@ -1565,7 +1593,7 @@
if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type))
return -1;
- err = dso__load_sym(dso, map, &ss, &ss, filter, 0);
+ err = dso__load_sym(dso, map, &ss, &ss, 0);
symsrc__destroy(&ss);
if (err > 0) {
@@ -1581,8 +1609,7 @@
return err;
}
-int dso__load_vmlinux_path(struct dso *dso, struct map *map,
- symbol_filter_t filter)
+int dso__load_vmlinux_path(struct dso *dso, struct map *map)
{
int i, err = 0;
char *filename = NULL;
@@ -1591,7 +1618,7 @@
vmlinux_path__nr_entries + 1);
for (i = 0; i < vmlinux_path__nr_entries; ++i) {
- err = dso__load_vmlinux(dso, map, vmlinux_path[i], false, filter);
+ err = dso__load_vmlinux(dso, map, vmlinux_path[i], false);
if (err > 0)
goto out;
}
@@ -1599,7 +1626,7 @@
if (!symbol_conf.ignore_vmlinux_buildid)
filename = dso__build_id_filename(dso, NULL, 0);
if (filename != NULL) {
- err = dso__load_vmlinux(dso, map, filename, true, filter);
+ err = dso__load_vmlinux(dso, map, filename, true);
if (err > 0)
goto out;
free(filename);
@@ -1713,8 +1740,7 @@
return strdup(path);
}
-static int dso__load_kernel_sym(struct dso *dso, struct map *map,
- symbol_filter_t filter)
+static int dso__load_kernel_sym(struct dso *dso, struct map *map)
{
int err;
const char *kallsyms_filename = NULL;
@@ -1740,12 +1766,11 @@
}
if (!symbol_conf.ignore_vmlinux && symbol_conf.vmlinux_name != NULL) {
- return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name,
- false, filter);
+ return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name, false);
}
if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) {
- err = dso__load_vmlinux_path(dso, map, filter);
+ err = dso__load_vmlinux_path(dso, map);
if (err > 0)
return err;
}
@@ -1761,7 +1786,7 @@
kallsyms_filename = kallsyms_allocated_filename;
do_kallsyms:
- err = dso__load_kallsyms(dso, kallsyms_filename, map, filter);
+ err = dso__load_kallsyms(dso, kallsyms_filename, map);
if (err > 0)
pr_debug("Using %s for symbols\n", kallsyms_filename);
free(kallsyms_allocated_filename);
@@ -1776,8 +1801,7 @@
return err;
}
-static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
- symbol_filter_t filter)
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map)
{
int err;
const char *kallsyms_filename = NULL;
@@ -1799,7 +1823,7 @@
if (symbol_conf.default_guest_vmlinux_name != NULL) {
err = dso__load_vmlinux(dso, map,
symbol_conf.default_guest_vmlinux_name,
- false, filter);
+ false);
return err;
}
@@ -1811,7 +1835,7 @@
kallsyms_filename = path;
}
- err = dso__load_kallsyms(dso, kallsyms_filename, map, filter);
+ err = dso__load_kallsyms(dso, kallsyms_filename, map);
if (err > 0)
pr_debug("Using %s for symbols\n", kallsyms_filename);
if (err > 0 && !dso__is_kcore(dso)) {
@@ -1948,6 +1972,23 @@
return value;
}
+int symbol__annotation_init(void)
+{
+ if (symbol_conf.initialized) {
+ pr_err("Annotation needs to be init before symbol__init()\n");
+ return -1;
+ }
+
+ if (symbol_conf.init_annotation) {
+ pr_warning("Annotation being initialized multiple times\n");
+ return 0;
+ }
+
+ symbol_conf.priv_size += sizeof(struct annotation);
+ symbol_conf.init_annotation = true;
+ return 0;
+}
+
int symbol__init(struct perf_env *env)
{
const char *symfs;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 699f7cb..d964844 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -57,7 +57,7 @@
u64 end;
u16 namelen;
u8 binding;
- bool ignore;
+ u8 idle:1;
u8 arch_sym;
char name[0];
};
@@ -88,6 +88,7 @@
unsigned short priv_size;
unsigned short nr_events;
bool try_vmlinux_path,
+ init_annotation,
force,
ignore_vmlinux,
ignore_vmlinux_buildid,
@@ -240,16 +241,13 @@
bool symsrc__has_symtab(struct symsrc *ss);
bool symsrc__possibly_runtime(struct symsrc *ss);
-int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter);
+int dso__load(struct dso *dso, struct map *map);
int dso__load_vmlinux(struct dso *dso, struct map *map,
- const char *vmlinux, bool vmlinux_allocated,
- symbol_filter_t filter);
-int dso__load_vmlinux_path(struct dso *dso, struct map *map,
- symbol_filter_t filter);
+ const char *vmlinux, bool vmlinux_allocated);
+int dso__load_vmlinux_path(struct dso *dso, struct map *map);
int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
- bool no_kcore, symbol_filter_t filter);
-int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
- symbol_filter_t filter);
+ bool no_kcore);
+int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map);
void dso__insert_symbol(struct dso *dso, enum map_type type,
struct symbol *sym);
@@ -261,6 +259,7 @@
struct symbol *symbol__next_by_name(struct symbol *sym);
struct symbol *dso__first_symbol(struct dso *dso, enum map_type type);
+struct symbol *dso__last_symbol(struct dso *dso, enum map_type type);
struct symbol *dso__next_symbol(struct symbol *sym);
enum dso_type dso__type_fd(int fd);
@@ -277,6 +276,8 @@
int symbol__init(struct perf_env *env);
void symbol__exit(void);
void symbol__elf_init(void);
+int symbol__annotation_init(void);
+
struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
const struct addr_location *al,
@@ -291,16 +292,15 @@
bool symbol_type__is_a(char symbol_type, enum map_type map_type);
bool symbol__restricted_filename(const char *filename,
const char *restricted_filename);
-bool symbol__is_idle(struct symbol *sym);
int symbol__config_symfs(const struct option *opt __maybe_unused,
const char *dir, int unset __maybe_unused);
int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
- struct symsrc *runtime_ss, symbol_filter_t filter,
- int kmodule);
+ struct symsrc *runtime_ss, int kmodule);
int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss,
- struct map *map, symbol_filter_t filter);
+ struct map *map);
+void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel);
void symbols__insert(struct rb_root *symbols, struct symbol *sym);
void symbols__fixup_duplicate(struct rb_root *symbols);
void symbols__fixup_end(struct rb_root *symbols);
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index cee559d..85c5680 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -15,6 +15,7 @@
#include <byteswap.h>
#include <linux/kernel.h>
#include <linux/log2.h>
+#include <linux/time64.h>
#include <unistd.h>
#include "callchain.h"
#include "strlist.h"
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index e5f5547..43899e0 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -179,10 +179,6 @@
#undef tolower
#undef toupper
-#ifndef NSEC_PER_MSEC
-#define NSEC_PER_MSEC 1000000L
-#endif
-
int parse_nsec_time(const char *str, u64 *ptime);
extern unsigned char sane_ctype[256];
diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c
index e73a79f..bc82596 100644
--- a/tools/power/acpi/common/cmfsize.c
+++ b/tools/power/acpi/common/cmfsize.c
@@ -44,7 +44,6 @@
#include <acpi/acpi.h>
#include "accommon.h"
#include "acapps.h"
-#include <stdio.h>
#define _COMPONENT ACPI_TOOLS
ACPI_MODULE_NAME("cmfsize")
@@ -69,24 +68,24 @@
/* Save the current file pointer, seek to EOF to obtain file size */
- current_offset = acpi_os_get_file_offset(file);
+ current_offset = ftell(file);
if (current_offset < 0) {
goto offset_error;
}
- status = acpi_os_set_file_offset(file, 0, ACPI_FILE_END);
+ status = fseek(file, 0, SEEK_END);
if (ACPI_FAILURE(status)) {
goto seek_error;
}
- file_size = acpi_os_get_file_offset(file);
+ file_size = ftell(file);
if (file_size < 0) {
goto offset_error;
}
/* Restore original file pointer */
- status = acpi_os_set_file_offset(file, current_offset, ACPI_FILE_BEGIN);
+ status = fseek(file, current_offset, SEEK_SET);
if (ACPI_FAILURE(status)) {
goto seek_error;
}
@@ -94,10 +93,10 @@
return ((u32)file_size);
offset_error:
- acpi_log_error("Could not get file offset");
+ fprintf(stderr, "Could not get file offset\n");
return (ACPI_UINT32_MAX);
seek_error:
- acpi_log_error("Could not set file offset");
+ fprintf(stderr, "Could not set file offset\n");
return (ACPI_UINT32_MAX);
}
diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c
index 0bd343f..3919970 100644
--- a/tools/power/acpi/common/getopt.c
+++ b/tools/power/acpi/common/getopt.c
@@ -57,7 +57,7 @@
#include "acapps.h"
#define ACPI_OPTION_ERROR(msg, badchar) \
- if (acpi_gbl_opterr) {acpi_log_error ("%s%c\n", msg, badchar);}
+ if (acpi_gbl_opterr) {fprintf (stderr, "%s%c\n", msg, badchar);}
int acpi_gbl_opterr = 1;
int acpi_gbl_optind = 1;
@@ -94,7 +94,7 @@
acpi_gbl_optarg =
&argv[acpi_gbl_optind++][(int)(current_char_ptr + 1)];
} else if (++acpi_gbl_optind >= argc) {
- ACPI_OPTION_ERROR("Option requires an argument: -", 'v');
+ ACPI_OPTION_ERROR("\nOption requires an argument", 0);
current_char_ptr = 1;
return (-1);
diff --git a/tools/power/acpi/os_specific/service_layers/oslibcfs.c b/tools/power/acpi/os_specific/service_layers/oslibcfs.c
deleted file mode 100644
index 11f4aba..0000000
--- a/tools/power/acpi/os_specific/service_layers/oslibcfs.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/******************************************************************************
- *
- * Module Name: oslibcfs - C library OSL for file I/O
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2016, Intel Corp.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#include <acpi/acpi.h>
-#include <stdio.h>
-#include <stdarg.h>
-
-#define _COMPONENT ACPI_OS_SERVICES
-ACPI_MODULE_NAME("oslibcfs")
-
-/*******************************************************************************
- *
- * FUNCTION: acpi_os_open_file
- *
- * PARAMETERS: path - File path
- * modes - File operation type
- *
- * RETURN: File descriptor.
- *
- * DESCRIPTION: Open a file for reading (ACPI_FILE_READING) or/and writing
- * (ACPI_FILE_WRITING).
- *
- ******************************************************************************/
-ACPI_FILE acpi_os_open_file(const char *path, u8 modes)
-{
- ACPI_FILE file;
- u32 i = 0;
- char modes_str[4];
-
- if (modes & ACPI_FILE_READING) {
- modes_str[i++] = 'r';
- }
- if (modes & ACPI_FILE_WRITING) {
- modes_str[i++] = 'w';
- }
-
- if (modes & ACPI_FILE_BINARY) {
- modes_str[i++] = 'b';
- }
-
- modes_str[i++] = '\0';
-
- file = fopen(path, modes_str);
- if (!file) {
- perror("Could not open file");
- }
-
- return (file);
-}
-
-/*******************************************************************************
- *
- * FUNCTION: acpi_os_close_file
- *
- * PARAMETERS: file - An open file descriptor
- *
- * RETURN: None.
- *
- * DESCRIPTION: Close a file opened via acpi_os_open_file.
- *
- ******************************************************************************/
-
-void acpi_os_close_file(ACPI_FILE file)
-{
-
- fclose(file);
-}
-
-/*******************************************************************************
- *
- * FUNCTION: acpi_os_read_file
- *
- * PARAMETERS: file - An open file descriptor
- * buffer - Data buffer
- * size - Data block size
- * count - Number of data blocks
- *
- * RETURN: Number of bytes actually read.
- *
- * DESCRIPTION: Read from a file.
- *
- ******************************************************************************/
-
-int
-acpi_os_read_file(ACPI_FILE file, void *buffer, acpi_size size, acpi_size count)
-{
- int length;
-
- length = fread(buffer, size, count, file);
- if (length < 0) {
- perror("Error reading file");
- }
-
- return (length);
-}
-
-/*******************************************************************************
- *
- * FUNCTION: acpi_os_write_file
- *
- * PARAMETERS: file - An open file descriptor
- * buffer - Data buffer
- * size - Data block size
- * count - Number of data blocks
- *
- * RETURN: Number of bytes actually written.
- *
- * DESCRIPTION: Write to a file.
- *
- ******************************************************************************/
-
-int
-acpi_os_write_file(ACPI_FILE file,
- void *buffer, acpi_size size, acpi_size count)
-{
- int length;
-
- length = fwrite(buffer, size, count, file);
- if (length < 0) {
- perror("Error writing file");
- }
-
- return (length);
-}
-
-/*******************************************************************************
- *
- * FUNCTION: acpi_os_get_file_offset
- *
- * PARAMETERS: file - An open file descriptor
- *
- * RETURN: Current file pointer position.
- *
- * DESCRIPTION: Get current file offset.
- *
- ******************************************************************************/
-
-long acpi_os_get_file_offset(ACPI_FILE file)
-{
- long offset;
-
- offset = ftell(file);
- return (offset);
-}
-
-/*******************************************************************************
- *
- * FUNCTION: acpi_os_set_file_offset
- *
- * PARAMETERS: file - An open file descriptor
- * offset - New file offset
- * from - From begin/end of file
- *
- * RETURN: Status
- *
- * DESCRIPTION: Set current file offset.
- *
- ******************************************************************************/
-
-acpi_status acpi_os_set_file_offset(ACPI_FILE file, long offset, u8 from)
-{
- int ret = 0;
-
- if (from == ACPI_FILE_BEGIN) {
- ret = fseek(file, offset, SEEK_SET);
- }
-
- if (from == ACPI_FILE_END) {
- ret = fseek(file, offset, SEEK_END);
- }
-
- if (ret < 0) {
- return (AE_ERROR);
- } else {
- return (AE_OK);
- }
-}
diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c
index 88aa66e..8d8003c 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixxf.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c
@@ -63,10 +63,7 @@
#define _COMPONENT ACPI_OS_SERVICES
ACPI_MODULE_NAME("osunixxf")
-u8 acpi_gbl_debug_timeout = FALSE;
-
/* Upcalls to acpi_exec */
-
void
ae_table_override(struct acpi_table_header *existing_table,
struct acpi_table_header **new_table);
diff --git a/tools/power/acpi/tools/acpidump/Makefile b/tools/power/acpi/tools/acpidump/Makefile
index 2942cdc..04b5db7 100644
--- a/tools/power/acpi/tools/acpidump/Makefile
+++ b/tools/power/acpi/tools/acpidump/Makefile
@@ -36,12 +36,13 @@
utdebug.o\
utexcep.o\
utglobal.o\
+ uthex.o\
utmath.o\
utnonansi.o\
utprint.o\
utstring.o\
+ utstrtoul64.o\
utxferror.o\
- oslibcfs.o\
oslinuxtbl.o\
cmfsize.o\
getopt.o
diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h
index 025c232..00423fc 100644
--- a/tools/power/acpi/tools/acpidump/acpidump.h
+++ b/tools/power/acpi/tools/acpidump/acpidump.h
@@ -55,11 +55,7 @@
#include <acpi/acpi.h>
#include "accommon.h"
#include "actables.h"
-
-#include <stdio.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <sys/stat.h>
+#include "acapps.h"
/* Globals */
@@ -72,12 +68,6 @@
EXTERN char INIT_GLOBAL(*gbl_output_filename, NULL);
EXTERN u64 INIT_GLOBAL(gbl_rsdp_base, 0);
-/* Globals required for use with ACPICA modules */
-
-#ifdef _DECLARE_GLOBALS
-u8 acpi_gbl_integer_byte_width = 8;
-#endif
-
/* Action table used to defer requested options */
struct ap_dump_action {
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index fb8f1d9..9031be1 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -69,16 +69,17 @@
/* Make sure signature is all ASCII and a valid ACPI name */
if (!acpi_ut_valid_nameseg(table->signature)) {
- acpi_log_error("Table signature (0x%8.8X) is invalid\n",
- *(u32 *)table->signature);
+ fprintf(stderr,
+ "Table signature (0x%8.8X) is invalid\n",
+ *(u32 *)table->signature);
return (FALSE);
}
/* Check for minimum table length */
if (table->length < sizeof(struct acpi_table_header)) {
- acpi_log_error("Table length (0x%8.8X) is invalid\n",
- table->length);
+ fprintf(stderr, "Table length (0x%8.8X) is invalid\n",
+ table->length);
return (FALSE);
}
}
@@ -115,8 +116,8 @@
}
if (ACPI_FAILURE(status)) {
- acpi_log_error("%4.4s: Warning: wrong checksum in table\n",
- table->signature);
+ fprintf(stderr, "%4.4s: Warning: wrong checksum in table\n",
+ table->signature);
}
return (AE_OK);
@@ -195,13 +196,13 @@
* Note: simplest to just always emit a 64-bit address. acpi_xtract
* utility can handle this.
*/
- acpi_ut_file_printf(gbl_output_file, "%4.4s @ 0x%8.8X%8.8X\n",
- table->signature, ACPI_FORMAT_UINT64(address));
+ fprintf(gbl_output_file, "%4.4s @ 0x%8.8X%8.8X\n",
+ table->signature, ACPI_FORMAT_UINT64(address));
acpi_ut_dump_buffer_to_file(gbl_output_file,
ACPI_CAST_PTR(u8, table), table_length,
DB_BYTE_DISPLAY, 0);
- acpi_ut_file_printf(gbl_output_file, "\n");
+ fprintf(gbl_output_file, "\n");
return (0);
}
@@ -239,14 +240,14 @@
if (status == AE_LIMIT) {
return (0);
} else if (i == 0) {
- acpi_log_error
- ("Could not get ACPI tables, %s\n",
- acpi_format_exception(status));
+ fprintf(stderr,
+ "Could not get ACPI tables, %s\n",
+ acpi_format_exception(status));
return (-1);
} else {
- acpi_log_error
- ("Could not get ACPI table at index %u, %s\n",
- i, acpi_format_exception(status));
+ fprintf(stderr,
+ "Could not get ACPI table at index %u, %s\n",
+ i, acpi_format_exception(status));
continue;
}
}
@@ -286,20 +287,20 @@
/* Convert argument to an integer physical address */
- status = acpi_ut_strtoul64(ascii_address, ACPI_ANY_BASE,
- ACPI_MAX64_BYTE_WIDTH, &long_address);
+ status = acpi_ut_strtoul64(ascii_address, ACPI_STRTOUL_64BIT,
+ &long_address);
if (ACPI_FAILURE(status)) {
- acpi_log_error("%s: Could not convert to a physical address\n",
- ascii_address);
+ fprintf(stderr, "%s: Could not convert to a physical address\n",
+ ascii_address);
return (-1);
}
address = (acpi_physical_address)long_address;
status = acpi_os_get_table_by_address(address, &table);
if (ACPI_FAILURE(status)) {
- acpi_log_error("Could not get table at 0x%8.8X%8.8X, %s\n",
- ACPI_FORMAT_UINT64(address),
- acpi_format_exception(status));
+ fprintf(stderr, "Could not get table at 0x%8.8X%8.8X, %s\n",
+ ACPI_FORMAT_UINT64(address),
+ acpi_format_exception(status));
return (-1);
}
@@ -331,9 +332,9 @@
int table_status;
if (strlen(signature) != ACPI_NAME_SIZE) {
- acpi_log_error
- ("Invalid table signature [%s]: must be exactly 4 characters\n",
- signature);
+ fprintf(stderr,
+ "Invalid table signature [%s]: must be exactly 4 characters\n",
+ signature);
return (-1);
}
@@ -363,9 +364,9 @@
return (0);
}
- acpi_log_error
- ("Could not get ACPI table with signature [%s], %s\n",
- local_signature, acpi_format_exception(status));
+ fprintf(stderr,
+ "Could not get ACPI table with signature [%s], %s\n",
+ local_signature, acpi_format_exception(status));
return (-1);
}
@@ -408,24 +409,24 @@
}
if (!acpi_ut_valid_nameseg(table->signature)) {
- acpi_log_error
- ("No valid ACPI signature was found in input file %s\n",
- pathname);
+ fprintf(stderr,
+ "No valid ACPI signature was found in input file %s\n",
+ pathname);
}
/* File must be at least as long as the table length */
if (table->length > file_size) {
- acpi_log_error
- ("Table length (0x%X) is too large for input file (0x%X) %s\n",
- table->length, file_size, pathname);
+ fprintf(stderr,
+ "Table length (0x%X) is too large for input file (0x%X) %s\n",
+ table->length, file_size, pathname);
goto exit;
}
if (gbl_verbose_mode) {
- acpi_log_error
- ("Input file: %s contains table [%4.4s], 0x%X (%u) bytes\n",
- pathname, table->signature, file_size, file_size);
+ fprintf(stderr,
+ "Input file: %s contains table [%4.4s], 0x%X (%u) bytes\n",
+ pathname, table->signature, file_size, file_size);
}
table_status = ap_dump_table_buffer(table, 0, 0);
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
index 5fcd970..dd5b861 100644
--- a/tools/power/acpi/tools/acpidump/apfiles.c
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -42,7 +42,6 @@
*/
#include "acpidump.h"
-#include "acapps.h"
/* Local prototypes */
@@ -66,7 +65,8 @@
struct stat stat_info;
if (!stat(pathname, &stat_info)) {
- acpi_log_error("Target path already exists, overwrite? [y|n] ");
+ fprintf(stderr,
+ "Target path already exists, overwrite? [y|n] ");
if (getchar() != 'y') {
return (-1);
@@ -102,9 +102,9 @@
/* Point stdout to the file */
- file = acpi_os_open_file(pathname, ACPI_FILE_WRITING);
+ file = fopen(pathname, "w");
if (!file) {
- acpi_log_error("Could not open output file: %s\n", pathname);
+ fprintf(stderr, "Could not open output file: %s\n", pathname);
return (-1);
}
@@ -134,7 +134,7 @@
char filename[ACPI_NAME_SIZE + 16];
char instance_str[16];
ACPI_FILE file;
- size_t actual;
+ acpi_size actual;
u32 table_length;
/* Obtain table length */
@@ -158,37 +158,36 @@
/* Handle multiple SSDts - create different filenames for each */
if (instance > 0) {
- acpi_ut_snprintf(instance_str, sizeof(instance_str), "%u",
- instance);
+ snprintf(instance_str, sizeof(instance_str), "%u", instance);
strcat(filename, instance_str);
}
strcat(filename, FILE_SUFFIX_BINARY_TABLE);
if (gbl_verbose_mode) {
- acpi_log_error
- ("Writing [%4.4s] to binary file: %s 0x%X (%u) bytes\n",
- table->signature, filename, table->length, table->length);
+ fprintf(stderr,
+ "Writing [%4.4s] to binary file: %s 0x%X (%u) bytes\n",
+ table->signature, filename, table->length,
+ table->length);
}
/* Open the file and dump the entire table in binary mode */
- file = acpi_os_open_file(filename,
- ACPI_FILE_WRITING | ACPI_FILE_BINARY);
+ file = fopen(filename, "wb");
if (!file) {
- acpi_log_error("Could not open output file: %s\n", filename);
+ fprintf(stderr, "Could not open output file: %s\n", filename);
return (-1);
}
- actual = acpi_os_write_file(file, table, 1, table_length);
+ actual = fwrite(table, 1, table_length, file);
if (actual != table_length) {
- acpi_log_error("Error writing binary output file: %s\n",
- filename);
- acpi_os_close_file(file);
+ fprintf(stderr, "Error writing binary output file: %s\n",
+ filename);
+ fclose(file);
return (-1);
}
- acpi_os_close_file(file);
+ fclose(file);
return (0);
}
@@ -211,14 +210,13 @@
struct acpi_table_header *buffer = NULL;
ACPI_FILE file;
u32 file_size;
- size_t actual;
+ acpi_size actual;
/* Must use binary mode */
- file =
- acpi_os_open_file(pathname, ACPI_FILE_READING | ACPI_FILE_BINARY);
+ file = fopen(pathname, "rb");
if (!file) {
- acpi_log_error("Could not open input file: %s\n", pathname);
+ fprintf(stderr, "Could not open input file: %s\n", pathname);
return (NULL);
}
@@ -226,7 +224,8 @@
file_size = cm_get_file_size(file);
if (file_size == ACPI_UINT32_MAX) {
- acpi_log_error("Could not get input file size: %s\n", pathname);
+ fprintf(stderr,
+ "Could not get input file size: %s\n", pathname);
goto cleanup;
}
@@ -234,16 +233,17 @@
buffer = ACPI_ALLOCATE_ZEROED(file_size);
if (!buffer) {
- acpi_log_error("Could not allocate file buffer of size: %u\n",
- file_size);
+ fprintf(stderr,
+ "Could not allocate file buffer of size: %u\n",
+ file_size);
goto cleanup;
}
/* Read the entire file */
- actual = acpi_os_read_file(file, buffer, 1, file_size);
+ actual = fread(buffer, 1, file_size, file);
if (actual != file_size) {
- acpi_log_error("Could not read input file: %s\n", pathname);
+ fprintf(stderr, "Could not read input file: %s\n", pathname);
ACPI_FREE(buffer);
buffer = NULL;
goto cleanup;
@@ -252,6 +252,6 @@
*out_file_size = file_size;
cleanup:
- acpi_os_close_file(file);
+ fclose(file);
return (buffer);
}
diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c
index 7692e6b..7ff46be 100644
--- a/tools/power/acpi/tools/acpidump/apmain.c
+++ b/tools/power/acpi/tools/acpidump/apmain.c
@@ -43,7 +43,6 @@
#define _DECLARE_GLOBALS
#include "acpidump.h"
-#include "acapps.h"
/*
* acpidump - A portable utility for obtaining system ACPI tables and dumping
@@ -140,8 +139,8 @@
current_action++;
if (current_action > AP_MAX_ACTIONS) {
- acpi_log_error("Too many table options (max %u)\n",
- AP_MAX_ACTIONS);
+ fprintf(stderr, "Too many table options (max %u)\n",
+ AP_MAX_ACTIONS);
return (-1);
}
@@ -186,9 +185,9 @@
} else if (!strcmp(acpi_gbl_optarg, "off")) {
gbl_dump_customized_tables = FALSE;
} else {
- acpi_log_error
- ("%s: Cannot handle this switch, please use on|off\n",
- acpi_gbl_optarg);
+ fprintf(stderr,
+ "%s: Cannot handle this switch, please use on|off\n",
+ acpi_gbl_optarg);
return (-1);
}
continue;
@@ -209,13 +208,13 @@
case 'r': /* Dump tables from specified RSDP */
status =
- acpi_ut_strtoul64(acpi_gbl_optarg, ACPI_ANY_BASE,
- ACPI_MAX64_BYTE_WIDTH,
+ acpi_ut_strtoul64(acpi_gbl_optarg,
+ ACPI_STRTOUL_64BIT,
&gbl_rsdp_base);
if (ACPI_FAILURE(status)) {
- acpi_log_error
- ("%s: Could not convert to a physical address\n",
- acpi_gbl_optarg);
+ fprintf(stderr,
+ "%s: Could not convert to a physical address\n",
+ acpi_gbl_optarg);
return (-1);
}
continue;
@@ -242,7 +241,7 @@
case 'z': /* Verbose mode */
gbl_verbose_mode = TRUE;
- acpi_log_error(ACPI_COMMON_SIGNON(AP_UTILITY_NAME));
+ fprintf(stderr, ACPI_COMMON_SIGNON(AP_UTILITY_NAME));
continue;
/*
@@ -315,6 +314,7 @@
ACPI_DEBUG_INITIALIZE(); /* For debug version only */
acpi_os_initialize();
gbl_output_file = ACPI_FILE_OUT;
+ acpi_gbl_integer_byte_width = 8;
/* Process command line options */
@@ -353,8 +353,9 @@
default:
- acpi_log_error("Internal error, invalid action: 0x%X\n",
- action->to_be_done);
+ fprintf(stderr,
+ "Internal error, invalid action: 0x%X\n",
+ action->to_be_done);
return (-1);
}
@@ -369,12 +370,12 @@
/* Summary for the output file */
file_size = cm_get_file_size(gbl_output_file);
- acpi_log_error
- ("Output file %s contains 0x%X (%u) bytes\n\n",
- gbl_output_filename, file_size, file_size);
+ fprintf(stderr,
+ "Output file %s contains 0x%X (%u) bytes\n\n",
+ gbl_output_filename, file_size, file_size);
}
- acpi_os_close_file(gbl_output_file);
+ fclose(gbl_output_file);
}
return (status);
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index dd48f42..f64c57b 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -603,7 +603,8 @@
return -ENOMEM;
sprintf(t->label[i], "label%d", i);
- t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS,
+ t->flush[i] = test_alloc(t, max(PAGE_SIZE,
+ sizeof(u64) * NUM_HINTS),
&t->flush_dma[i]);
if (!t->flush[i])
return -ENOMEM;
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 3b53046..9d0919ed 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,5 +1,5 @@
-CFLAGS += -I. -g -Wall -D_LGPL_SOURCE
+CFLAGS += -I. -g -O2 -Wall -D_LGPL_SOURCE
LDFLAGS += -lpthread -lurcu
TARGETS = main
OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \
diff --git a/tools/testing/radix-tree/linux/cpu.h b/tools/testing/radix-tree/linux/cpu.h
index 60a4045..7cf4121 100644
--- a/tools/testing/radix-tree/linux/cpu.h
+++ b/tools/testing/radix-tree/linux/cpu.h
@@ -7,19 +7,8 @@
#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */
#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
-#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task,
- * not handling interrupts, soon dead.
- * Called on the dying cpu, interrupts
- * are already disabled. Must not
- * sleep, must not fail */
#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug
* lock is dropped */
-#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running.
- * Called on the new cpu, just before
- * enabling interrupts. Must not sleep,
- * must not fail */
-#define CPU_DYING_IDLE 0x000B /* CPU (unsigned)v dying, reached
- * idle loop. */
#define CPU_BROKEN 0x000C /* CPU (unsigned)v did not die properly,
* perhaps due to preemption. */
#define CPU_TASKS_FROZEN 0x0010
@@ -30,5 +19,3 @@
#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
-#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN)
-#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN)
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 39d9b95..05d7bc4 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -124,6 +124,8 @@
unsigned long i;
unsigned long min = index & ~((1UL << order) - 1);
unsigned long max = min + (1UL << order);
+ void **slot;
+ struct item *item2 = item_create(min);
RADIX_TREE(tree, GFP_KERNEL);
printf("Multiorder index %ld, order %d\n", index, order);
@@ -139,13 +141,19 @@
item_check_absent(&tree, i);
for (i = max; i < 2*max; i++)
item_check_absent(&tree, i);
+ for (i = min; i < max; i++)
+ assert(radix_tree_insert(&tree, i, item2) == -EEXIST);
+
+ slot = radix_tree_lookup_slot(&tree, index);
+ free(*slot);
+ radix_tree_replace_slot(slot, item2);
for (i = min; i < max; i++) {
- static void *entry = (void *)
- (0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY);
- assert(radix_tree_insert(&tree, i, entry) == -EEXIST);
+ struct item *item = item_lookup(&tree, i);
+ assert(item != 0);
+ assert(item->index == min);
}
- assert(item_delete(&tree, index) != 0);
+ assert(item_delete(&tree, min) != 0);
for (i = 0; i < 2*max; i++)
item_check_absent(&tree, i);
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index 4214567..b037ce9c 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -147,7 +147,7 @@
if (args.nr != getpid() ||
args.arg0 != 10 || args.arg1 != 11 || args.arg2 != 12 ||
args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
- printf("[FAIL]\tgetpid() failed to preseve regs\n");
+ printf("[FAIL]\tgetpid() failed to preserve regs\n");
nerrs++;
} else {
printf("[OK]\tgetpid() preserves regs\n");
@@ -162,7 +162,7 @@
if (args.nr != 0 ||
args.arg0 != getpid() || args.arg1 != SIGUSR1 || args.arg2 != 12 ||
args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
- printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preseve regs\n");
+ printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preserve regs\n");
nerrs++;
} else {
printf("[OK]\tkill(getpid(), SIGUSR1) preserves regs\n");
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 8a577e7..246145b 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -106,7 +106,7 @@
".type int3, @function\n\t"
".align 4096\n\t"
"int3:\n\t"
- "mov %ss,%eax\n\t"
+ "mov %ss,%ecx\n\t"
"int3\n\t"
".size int3, . - int3\n\t"
".align 4096, 0xcc\n\t"
@@ -306,7 +306,7 @@
#ifdef __x86_64__
# define REG_IP REG_RIP
# define REG_SP REG_RSP
-# define REG_AX REG_RAX
+# define REG_CX REG_RCX
struct selectors {
unsigned short cs, gs, fs, ss;
@@ -326,7 +326,7 @@
#else
# define REG_IP REG_EIP
# define REG_SP REG_ESP
-# define REG_AX REG_EAX
+# define REG_CX REG_ECX
static greg_t *ssptr(ucontext_t *ctx)
{
@@ -457,10 +457,10 @@
ctx->uc_mcontext.gregs[REG_IP] =
sig_cs == code16_sel ? 0 : (unsigned long)&int3;
ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
- ctx->uc_mcontext.gregs[REG_AX] = 0;
+ ctx->uc_mcontext.gregs[REG_CX] = 0;
memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
- requested_regs[REG_AX] = *ssptr(ctx); /* The asm code does this. */
+ requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */
return;
}
@@ -482,7 +482,7 @@
unsigned short ss;
asm ("mov %%ss,%0" : "=r" (ss));
- greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX];
+ greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
if (asm_ss != sig_ss && sig == SIGTRAP) {
/* Sanity check failure. */
printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
@@ -654,8 +654,8 @@
#endif
/* Sanity check on the kernel */
- if (i == REG_AX && requested_regs[i] != resulting_regs[i]) {
- printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
+ if (i == REG_CX && requested_regs[i] != resulting_regs[i]) {
+ printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
(unsigned long long)requested_regs[i],
(unsigned long long)resulting_regs[i]);
nerrs++;